diff options
author | Quentin Carbonneaux <quentin@c9x.me> | 2017-04-08 21:06:33 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin@c9x.me> | 2017-04-08 21:56:20 -0400 |
commit | 49a4593c335126ba279f47328824abfef379725e (patch) | |
tree | 2f4cb5e9884ec958ea32a494da302a9aae8ca420 /amd64/sysv.c | |
parent | 9d1c38d69547d835f7228651e71e8a7d744c456d (diff) | |
download | roux-49a4593c335126ba279f47328824abfef379725e.tar.gz |
prepare for multi-target
This big diff does multiple changes to allow the addition of new targets to qbe. The changes are listed below in decreasing order of impact. 1. Add a new Target structure. To add support for a given target, one has to implement all the members of the Target structure. All the source files where changed to use this interface where needed. 2. Single out amd64-specific code. In this commit, the amd64 target T_amd64_sysv is the only target available, it is implemented in the amd64/ directory. All the non-static items in this directory are prefixed with either amd64_ or amd64_sysv (for items that are specific to the System V ABI). 3. Centralize Ops information. There is now a file 'ops.h' that must be used to store all the available operations together with their metadata. The various targets will only select what they need; but it is beneficial that there is only *one* place to change to add a new instruction. One good side effect of this change is that any operation 'xyz' in the IL now as a corresponding 'Oxyz' in the code. 4. Misc fixes. One notable change is that instruction selection now generates generic comparison operations and the lowering to the target's comparisons is done in the emitter. GAS directives for data are the same for many targets, so data emission was extracted in a file 'gas.c'. 5. Modularize the Makefile. The Makefile now has a list of C files that are target-independent (SRC), and one list of C files per target. Each target can also use its own 'all.h' header (for example to define registers).
Diffstat (limited to 'amd64/sysv.c')
-rw-r--r-- | amd64/sysv.c | 701 |
1 files changed, 701 insertions, 0 deletions
diff --git a/amd64/sysv.c b/amd64/sysv.c new file mode 100644 index 0000000..dcaa812 --- /dev/null +++ b/amd64/sysv.c @@ -0,0 +1,701 @@ +#include "all.h" + +typedef struct AClass AClass; +typedef struct RAlloc RAlloc; + +struct AClass { + int inmem; + int align; + uint size; + int cls[2]; + Ref ref[2]; +}; + +struct RAlloc { + Ins i; + RAlloc *link; +}; + +static void +classify(AClass *a, Typ *t, int *pn, int *pe) +{ + Seg *seg; + int n, s, *cls; + + for (n=0; n<t->nunion; n++) { + seg = t->seg[n]; + for (s=0; *pe<2; (*pe)++) { + cls = &a->cls[*pe]; + for (; *pn<8; s++) { + switch (seg[s].type) { + case SEnd: + goto Done; + case SPad: + /* don't change anything */ + break; + case SFlt: + if (*cls == Kx) + *cls = Kd; + break; + case SInt: + *cls = Kl; + break; + case STyp: + classify(a, &typ[seg[s].len], pn, pe); + continue; + } + *pn += seg[s].len; + } + Done: + assert(*pn <= 8); + *pn = 0; + } + } +} + +static void +typclass(AClass *a, Typ *t) +{ + int e, n; + uint sz, al; + + sz = t->size; + al = 1u << t->align; + + /* the ABI requires sizes to be rounded + * up to the nearest multiple of 8, moreover + * it makes it easy load and store structures + * in registers + */ + if (al < 8) + al = 8; + sz = (sz + al-1) & -al; + + a->size = sz; + a->align = t->align; + + if (t->dark || sz > 16 || sz == 0) { + /* large or unaligned structures are + * required to be passed in memory + */ + a->inmem = 1; + return; + } + + a->cls[0] = Kx; + a->cls[1] = Kx; + a->inmem = 0; + n = 0; + e = 0; + classify(a, t, &n, &e); +} + +static int +retr(Ref reg[2], AClass *aret) +{ + static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; + int n, k, ca, nr[2]; + + nr[0] = nr[1] = 0; + ca = 0; + for (n=0; (uint)n*8<aret->size; n++) { + k = KBASE(aret->cls[n]); + reg[n] = TMP(retreg[k][nr[k]++]); + ca += 1 << (2 * k); + } + return ca; +} + +static void +selret(Blk *b, Fn *fn) +{ + int j, k, ca; + Ref r, r0, reg[2]; + AClass aret; + + j = b->jmp.type; + + if (!isret(j) || j == Jret0) + return; + + r0 = b->jmp.arg; + b->jmp.type = Jret0; + + if (j == Jretc) { + typclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + assert(rtype(fn->retr) == RTmp); + emit(Ocopy, Kl, TMP(RAX), fn->retr, R); + blit(fn->retr, 0, r0, aret.size, fn); + ca = 1; + } else { + ca = retr(reg, &aret); + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + emit(Oload, Kl, reg[1], r, R); + emit(Oadd, Kl, r, r0, getcon(8, fn)); + } + emit(Oload, Kl, reg[0], r0, R); + } + } else { + k = j - Jretw; + if (KBASE(k) == 0) { + emit(Ocopy, k, TMP(RAX), r0, R); + ca = 1; + } else { + emit(Ocopy, k, TMP(XMM0), r0, R); + ca = 1 << 2; + } + } + + b->jmp.arg = CALL(ca); +} + +static int +argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env) +{ + int nint, ni, nsse, ns, n, *pn; + AClass *a; + Ins *i; + + if (aret && aret->inmem) + nint = 5; /* hidden argument */ + else + nint = 6; + nsse = 8; + for (i=i0, a=ac; i<i1; i++, a++) + switch (i->op - op + Oarg) { + case Oarg: + if (KBASE(i->cls) == 0) + pn = &nint; + else + pn = &nsse; + if (*pn > 0) { + --*pn; + a->inmem = 0; + } else + a->inmem = 2; + a->align = 3; + a->size = 8; + a->cls[0] = i->cls; + break; + case Oargc: + n = i->arg[0].val; + typclass(a, &typ[n]); + if (a->inmem) + continue; + ni = ns = 0; + for (n=0; (uint)n*8<a->size; n++) + if (KBASE(a->cls[n]) == 0) + ni++; + else + ns++; + if (nint >= ni && nsse >= ns) { + nint -= ni; + nsse -= ns; + } else + a->inmem = 1; + break; + case Oarge: + if (op == Opar) + *env = i->to; + else + *env = i->arg[0]; + break; + } + + return ((6-nint) << 4) | ((8-nsse) << 8); +} + +int amd64_sysv_rsave[] = { + RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1 +}; +int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1}; + +MAKESURE(sysv_arrays_ok, + sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) && + sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int) +); + +/* layout of call's second argument (RCall) + * + * 29 12 8 4 3 0 + * |0...00|x|xxxx|xxxx|xx|xx| range + * | | | | ` gp regs returned (0..2) + * | | | ` sse regs returned (0..2) + * | | ` gp regs passed (0..6) + * | ` sse regs passed (0..8) + * ` 1 if rax is used to pass data (0..1) + */ + +bits +amd64_sysv_retregs(Ref r, int p[2]) +{ + bits b; + int ni, nf; + + assert(rtype(r) == RCall); + b = 0; + ni = r.val & 3; + nf = (r.val >> 2) & 3; + if (ni >= 1) + b |= BIT(RAX); + if (ni >= 2) + b |= BIT(RDX); + if (nf >= 1) + b |= BIT(XMM0); + if (nf >= 2) + b |= BIT(XMM1); + if (p) { + p[0] = ni; + p[1] = nf; + } + return b; +} + +bits +amd64_sysv_argregs(Ref r, int p[2]) +{ + bits b; + int j, ni, nf, ra; + + assert(rtype(r) == RCall); + b = 0; + ni = (r.val >> 4) & 15; + nf = (r.val >> 8) & 15; + ra = (r.val >> 12) & 1; + for (j=0; j<ni; j++) + b |= BIT(amd64_sysv_rsave[j]); + for (j=0; j<nf; j++) + b |= BIT(XMM0+j); + if (p) { + p[0] = ni + ra; + p[1] = nf; + } + return b | (ra ? BIT(RAX) : 0); +} + +static Ref +rarg(int ty, int *ni, int *ns) +{ + if (KBASE(ty) == 0) + return TMP(amd64_sysv_rsave[(*ni)++]); + else + return TMP(XMM0 + (*ns)++); +} + +static void +selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) +{ + Ins *i; + AClass *ac, *a, aret; + int ca, ni, ns, al, varc, envc; + uint stk, off; + Ref r, r1, r2, reg[2], env; + RAlloc *ra; + + env = R; + ac = alloc((i1-i0) * sizeof ac[0]); + + if (!req(i1->arg[1], R)) { + assert(rtype(i1->arg[1]) == RType); + typclass(&aret, &typ[i1->arg[1].val]); + ca = argsclass(i0, i1, ac, Oarg, &aret, &env); + } else + ca = argsclass(i0, i1, ac, Oarg, 0, &env); + + for (stk=0, a=&ac[i1-i0]; a>ac;) + if ((--a)->inmem) { + if (a->align > 4) + err("sysv abi requires alignments of 16 or less"); + stk += a->size; + if (a->align == 4) + stk += stk & 15; + } + stk += stk & 15; + if (stk) { + r = getcon(-(int64_t)stk, fn); + emit(Osalloc, Kl, R, r, R); + } + + if (!req(i1->arg[1], R)) { + if (aret.inmem) { + /* get the return location from eax + * it saves one callee-save reg */ + r1 = newtmp("abi", Kl, fn); + emit(Ocopy, Kl, i1->to, TMP(RAX), R); + ca += 1; + } else { + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + aret.ref[1] = newtmp("abi", aret.cls[1], fn); + emit(Ostorel, 0, R, aret.ref[1], r); + emit(Oadd, Kl, r, i1->to, getcon(8, fn)); + } + aret.ref[0] = newtmp("abi", aret.cls[0], fn); + emit(Ostorel, 0, R, aret.ref[0], i1->to); + ca += retr(reg, &aret); + if (aret.size > 8) + emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R); + emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R); + r1 = i1->to; + } + /* allocate return pad */ + ra = alloc(sizeof *ra); + /* specific to NAlign == 3 */ + al = aret.align >= 2 ? aret.align - 2 : 0; + ra->i = (Ins){Oalloc+al, r1, {getcon(aret.size, fn)}, Kl}; + ra->link = (*rap); + *rap = ra; + } else { + ra = 0; + if (KBASE(i1->cls) == 0) { + emit(Ocopy, i1->cls, i1->to, TMP(RAX), R); + ca += 1; + } else { + emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R); + ca += 1 << 2; + } + } + envc = !req(R, env); + varc = i1->op == Ovacall; + if (varc && envc) + err("sysv abi does not support variadic env calls"); + ca |= (varc | envc) << 12; + emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca)); + if (envc) + emit(Ocopy, Kl, TMP(RAX), env, R); + if (varc) + emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); + + ni = ns = 0; + if (ra && aret.inmem) + emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ + for (i=i0, a=ac; i<i1; i++, a++) { + if (a->inmem) + continue; + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == Oargc) { + if (a->size > 8) { + r2 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + emit(Oload, a->cls[1], r2, r, R); + emit(Oadd, Kl, r, i->arg[1], getcon(8, fn)); + } + emit(Oload, a->cls[0], r1, i->arg[1], R); + } else + emit(Ocopy, i->cls, r1, i->arg[0], R); + } + + if (!stk) + return; + + r = newtmp("abi", Kl, fn); + for (i=i0, a=ac, off=0; i<i1; i++, a++) { + if (!a->inmem) + continue; + if (i->op == Oargc) { + if (a->align == 4) + off += off & 15; + blit(r, off, i->arg[1], a->size, fn); + } else { + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, i->arg[0], r1); + emit(Oadd, Kl, r1, r, getcon(off, fn)); + } + off += a->size; + } + emit(Osalloc, Kl, r, getcon(stk, fn), R); +} + +static int +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + AClass *ac, *a, aret; + Ins *i; + int ni, ns, s, al, fa; + Ref r, env; + + env = R; + ac = alloc((i1-i0) * sizeof ac[0]); + curi = &insb[NIns]; + ni = ns = 0; + + if (fn->retty >= 0) { + typclass(&aret, &typ[fn->retty]); + fa = argsclass(i0, i1, ac, Opar, &aret, &env); + } else + fa = argsclass(i0, i1, ac, Opar, 0, &env); + + for (i=i0, a=ac; i<i1; i++, a++) { + if (i->op != Oparc || a->inmem) + continue; + if (a->size > 8) { + r = newtmp("abi", Kl, fn); + a->ref[1] = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, a->ref[1], r); + emit(Oadd, Kl, r, i->to, getcon(8, fn)); + } + a->ref[0] = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, a->ref[0], i->to); + /* specific to NAlign == 3 */ + al = a->align >= 2 ? a->align - 2 : 0; + emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R); + } + + if (fn->retty >= 0 && aret.inmem) { + r = newtmp("abi", Kl, fn); + emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R); + fn->retr = r; + } + + for (i=i0, a=ac, s=4; i<i1; i++, a++) { + switch (a->inmem) { + case 1: + if (a->align > 4) + err("sysv abi requires alignments of 16 or less"); + if (a->align == 4) + s = (s+3) & -4; + fn->tmp[i->to.val].slot = -s; + s += a->size / 4; + continue; + case 2: + emit(Oload, i->cls, i->to, SLOT(-s), R); + s += 2; + continue; + } + r = rarg(a->cls[0], &ni, &ns); + if (i->op == Oparc) { + emit(Ocopy, Kl, a->ref[0], r, R); + if (a->size > 8) { + r = rarg(a->cls[1], &ni, &ns); + emit(Ocopy, Kl, a->ref[1], r, R); + } + } else + emit(Ocopy, i->cls, i->to, r, R); + } + + if (!req(R, env)) + emit(Ocopy, Kl, env, TMP(RAX), R); + + return fa | (s*4)<<12; +} + +static Blk * +split(Fn *fn, Blk *b) +{ + Blk *bn; + + ++fn->nblk; + bn = blknew(); + bn->nins = &insb[NIns] - curi; + idup(&bn->ins, curi, bn->nins); + curi = &insb[NIns]; + bn->visit = ++b->visit; + snprintf(bn->name, NString, "%s.%d", b->name, b->visit); + bn->loop = b->loop; + bn->link = b->link; + b->link = bn; + return bn; +} + +static void +chpred(Blk *b, Blk *bp, Blk *bp1) +{ + Phi *p; + uint a; + + for (p=b->phi; p; p=p->link) { + for (a=0; p->blk[a]!=bp; a++) + assert(a+1<p->narg); + p->blk[a] = bp1; + } +} + +static void +selvaarg(Fn *fn, Blk *b, Ins *i) +{ + Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap; + Blk *b0, *bstk, *breg; + int isint; + + c4 = getcon(4, fn); + c8 = getcon(8, fn); + c16 = getcon(16, fn); + ap = i->arg[0]; + isint = KBASE(i->cls) == 0; + + /* @b [...] + r0 =l add ap, (0 or 4) + nr =l loadsw r0 + r1 =w cultw nr, (48 or 176) + jnz r1, @breg, @bstk + @breg + r0 =l add ap, 16 + r1 =l loadl r0 + lreg =l add r1, nr + r0 =w add nr, (8 or 16) + r1 =l add ap, (0 or 4) + storew r0, r1 + @bstk + r0 =l add ap, 8 + lstk =l loadl r0 + r1 =l add lstk, 8 + storel r1, r0 + @b0 + %loc =l phi @breg %lreg, @bstk %lstk + i->to =(i->cls) load %loc + */ + + loc = newtmp("abi", Kl, fn); + emit(Oload, i->cls, i->to, loc, R); + b0 = split(fn, b); + b0->jmp = b->jmp; + b0->s1 = b->s1; + b0->s2 = b->s2; + if (b->s1) + chpred(b->s1, b, b0); + if (b->s2 && b->s2 != b->s1) + chpred(b->s2, b, b0); + + lreg = newtmp("abi", Kl, fn); + nr = newtmp("abi", Kl, fn); + r0 = newtmp("abi", Kw, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorew, Kw, R, r0, r1); + emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4); + emit(Oadd, Kw, r0, nr, isint ? c8 : c16); + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Oadd, Kl, lreg, r1, nr); + emit(Oload, Kl, r1, r0, R); + emit(Oadd, Kl, r0, ap, c16); + breg = split(fn, b); + breg->jmp.type = Jjmp; + breg->s1 = b0; + + lstk = newtmp("abi", Kl, fn); + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, lstk, c8); + emit(Oload, Kl, lstk, r0, R); + emit(Oadd, Kl, r0, ap, c8); + bstk = split(fn, b); + bstk->jmp.type = Jjmp; + bstk->s1 = b0; + + b0->phi = alloc(sizeof *b0->phi); + *b0->phi = (Phi){ + .cls = Kl, .to = loc, + .narg = 2, + .blk = {bstk, breg}, + .arg = {lstk, lreg}, + }; + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kw, fn); + b->jmp.type = Jjnz; + b->jmp.arg = r1; + b->s1 = breg; + b->s2 = bstk; + c = getcon(isint ? 48 : 176, fn); + emit(Ocmpw+Ciult, Kw, r1, nr, c); + emit(Oloadsw, Kl, nr, r0, R); + emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4); +} + +static void +selvastart(Fn *fn, int fa, Ref ap) +{ + Ref r0, r1; + int gp, fp, sp; + + gp = ((fa >> 4) & 15) * 8; + fp = 48 + ((fa >> 8) & 15) * 16; + sp = fa >> 12; + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn)); + emit(Oadd, Kl, r0, ap, getcon(16, fn)); + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn)); + emit(Oadd, Kl, r0, ap, getcon(8, fn)); + r0 = newtmp("abi", Kl, fn); + emit(Ostorew, Kw, R, getcon(fp, fn), r0); + emit(Oadd, Kl, r0, ap, getcon(4, fn)); + emit(Ostorew, Kw, R, getcon(gp, fn), ap); +} + +void +amd64_sysv_abi(Fn *fn) +{ + Blk *b; + Ins *i, *i0, *ip; + RAlloc *ral; + int n, fa; + + for (b=fn->start; b; b=b->link) + b->visit = 0; + + /* lower parameters */ + for (b=fn->start, i=b->ins; i-b->ins<b->nins; i++) + if (!ispar(i->op)) + break; + fa = selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (&insb[NIns] - curi); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, curi, &insb[NIns] - curi); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower calls, returns, and vararg instructions */ + ral = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + if (b->visit) + continue; + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + switch ((--i)->op) { + default: + emiti(*i); + break; + case Ocall: + case Ovacall: + for (i0=i; i0>b->ins; i0--) + if (!isarg((i0-1)->op)) + break; + selcall(fn, i0, i, &ral); + i = i0; + break; + case Ovastart: + selvastart(fn, fa, i->arg[0]); + break; + case Ovaarg: + selvaarg(fn, b, i); + break; + case Oarg: + case Oargc: + die("unreachable"); + } + if (b == fn->start) + for (; ral; ral=ral->link) + emiti(ral->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After ABI lowering:\n"); + printfn(fn, stderr); + } +} |