diff options
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | all.h | 6 | ||||
-rw-r--r-- | isel.c | 515 | ||||
-rw-r--r-- | main.c | 1 | ||||
-rw-r--r-- | parse.c | 65 | ||||
-rw-r--r-- | sysv.c | 512 | ||||
-rw-r--r-- | util.c | 13 |
7 files changed, 561 insertions, 554 deletions
diff --git a/Makefile b/Makefile index 447954b..f6497bd 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,10 @@ BIN = qbe +ABI = sysv V = @ OBJDIR = obj -SRC = main.c util.c parse.c mem.c ssa.c copy.c live.c isel.c spill.c rega.c emit.c +SRC = main.c util.c parse.c mem.c ssa.c copy.c live.c isel.c spill.c rega.c emit.c $(ABI).c OBJ = $(SRC:%.c=$(OBJDIR)/%.o) CFLAGS += -Wall -Wextra -std=c99 -g -pedantic diff --git a/all.h b/all.h index 51b641a..a140ef4 100644 --- a/all.h +++ b/all.h @@ -495,6 +495,7 @@ void *vnew(ulong, size_t); void vgrow(void *, ulong); int phicls(int, Tmp *); Ref newtmp(char *, int, Fn *); +void chuse(Ref, int, Fn *); Ref getcon(int64_t, Fn *); void addcon(Con *, Con *); void dumpts(BSet *, Tmp *, FILE *); @@ -541,11 +542,14 @@ void copy(Fn *); void liveon(BSet *, Blk *, Blk *); void filllive(Fn *); -/* isel.c */ +/* abi: sysv.c */ extern int rsave[/* NRSave */]; extern int rclob[/* NRClob */]; bits retregs(Ref, int[2]); bits argregs(Ref, int[2]); +void abi(Fn *); + +/* isel.c */ void isel(Fn *); /* spill.c */ diff --git a/isel.c b/isel.c index 6d19ccc..f20efb0 100644 --- a/isel.c +++ b/isel.c @@ -20,8 +20,6 @@ */ typedef struct ANum ANum; -typedef struct AClass AClass; -typedef struct RAlloc RAlloc; struct ANum { char n, l, r; @@ -157,13 +155,6 @@ fixarg(Ref *r, int k, int phi, Fn *fn) } static void -chuse(Ref r, int du, Fn *fn) -{ - if (rtype(r) == RTmp) - fn->tmp[r.val].nuse += du; -} - -static void seladdr(Ref *r, ANum *an, Fn *fn) { Addr a; @@ -357,146 +348,6 @@ flagi(Ins *i0, Ins *i) return 0; } -struct AClass { - int inmem; - int align; - uint size; - int cls[2]; -}; - -static void -aclass(AClass *a, Typ *t) -{ - int e, s, n, cls; - uint sz, al; - - sz = t->size; - al = 1u << t->align; - - /* the ABI requires sizes to be rounded - * up to the nearest multiple of 8, moreover - * it makes it easy load and store structures - * in registers - */ - if (al < 8) - al = 8; - sz = (sz + al-1) & -al; - - a->size = sz; - a->align = t->align; - - if (t->dark || sz > 16) { - /* large or unaligned structures are - * required to be passed in memory - */ - a->inmem = 1; - return; - } - - a->inmem = 0; - for (e=0, s=0; e<2; e++) { - cls = -1; - for (n=0; n<8 && t->seg[s].len; s++) { - if (t->seg[s].ispad) { - /* don't change anything */ - } - else if (t->seg[s].isflt) { - if (cls == -1) - cls = Kd; - } - else - cls = Kl; - n += t->seg[s].len; - } - assert(n <= 8); - a->cls[e] = cls; - } -} - -static void -blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) -{ - Ref r, r1; - uint boff; - - /* it's an impolite blit, we might go across the end - * of the source object a little bit... */ - for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { - r = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(OStorel, 0, R, r, r1); - emit(OAdd, Kl, r1, rstk, getcon(soff, fn)); - r1 = newtmp("abi", Kl, fn); - emit(OLoad, Kl, r, r1, R); - emit(OAdd, Kl, r1, rsrc, getcon(boff, fn)); - chuse(rsrc, +1, fn); - chuse(rstk, +1, fn); - } -} - -static int -retr(Ref reg[2], AClass *aret) -{ - static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; - int n, k, ca, nr[2]; - - nr[0] = nr[1] = 0; - ca = 0; - for (n=0; aret->cls[n]>=0 && n<2; n++) { - k = KBASE(aret->cls[n]); - reg[n] = TMP(retreg[k][nr[k]++]); - ca += 1 << (2 * k); - } - return ca; -} - -static void -selret(Blk *b, Fn *fn) -{ - int j, k, ca; - Ref r, r0, reg[2]; - AClass aret; - - j = b->jmp.type; - - if (!isret(j) || j == JRet0) - return; - - r0 = b->jmp.arg; - b->jmp.type = JRet0; - - if (j == JRetc) { - aclass(&aret, &typ[fn->retty]); - if (aret.inmem) { - assert(rtype(fn->retr) == RTmp); - emit(OCopy, Kl, TMP(RAX), fn->retr, R); - chuse(fn->retr, +1, fn); - blit(fn->retr, 0, r0, aret.size, fn); - ca = 1; - } else { - ca = retr(reg, &aret); - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - emit(OLoad, Kl, reg[1], r, R); - emit(OAdd, Kl, r, r0, getcon(8, fn)); - chuse(r0, +1, fn); - } - emit(OLoad, Kl, reg[0], r0, R); - } - } else { - k = j - JRetw; - if (KBASE(k) == 0) { - emit(OCopy, k, TMP(RAX), r0, R); - ca = 1; - } else { - emit(OCopy, k, TMP(XMM0), r0, R); - ca = 1 << 2; - } - } - - b->jmp.arg = CALL(ca); -} - static void seljmp(Blk *b, Fn *fn) { @@ -557,320 +408,6 @@ seljmp(Blk *b, Fn *fn) } static int -classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret) -{ - int nint, ni, nsse, ns, n, *pn; - AClass *a; - Ins *i; - - if (aret && aret->inmem) - nint = 5; /* hidden argument */ - else - nint = 6; - nsse = 8; - for (i=i0, a=ac; i<i1; i++, a++) { - if (i->op == op) { - if (KBASE(i->cls) == 0) - pn = &nint; - else - pn = &nsse; - if (*pn > 0) { - --*pn; - a->inmem = 0; - } else - a->inmem = 2; - a->align = 3; - a->size = 8; - a->cls[0] = i->cls; - } else { - n = i->arg[0].val & AMask; - aclass(a, &typ[n]); - if (a->inmem) - continue; - ni = ns = 0; - for (n=0; n<2; n++) - if (KBASE(a->cls[n]) == 0) - ni++; - else - ns++; - if (nint >= ni && nsse >= ns) { - nint -= ni; - nsse -= ns; - } else - a->inmem = 1; - } - } - - return ((6-nint) << 4) | ((8-nsse) << 8); -} - -int rsave[] = { - RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 -}; -int rclob[] = {RBX, R12, R13, R14, R15}; - -MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); -MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); - -bits -retregs(Ref r, int p[2]) -{ - bits b; - int ni, nf; - - assert(rtype(r) == RACall); - b = 0; - ni = r.val & 3; - nf = (r.val >> 2) & 3; - if (ni >= 1) - b |= BIT(RAX); - if (ni >= 2) - b |= BIT(RDX); - if (nf >= 1) - b |= BIT(XMM0); - if (nf >= 2) - b |= BIT(XMM1); - if (p) { - p[0] = ni; - p[1] = nf; - } - return b; -} - -bits -argregs(Ref r, int p[2]) -{ - bits b; - int j, ni, nf; - - assert(rtype(r) == RACall); - b = 0; - ni = (r.val >> 4) & 15; - nf = (r.val >> 8) & 15; - for (j=0; j<ni; j++) - b |= BIT(rsave[j]); - for (j=0; j<nf; j++) - b |= BIT(XMM0+j); - if (p) { - p[0] = ni + 1; - p[1] = nf; - } - return b | BIT(RAX); -} - -static Ref -rarg(int ty, int *ni, int *ns) -{ - if (KBASE(ty) == 0) - return TMP(rsave[(*ni)++]); - else - return TMP(XMM0 + (*ns)++); -} - -struct RAlloc { - Ins i; - RAlloc *link; -}; - -static void -selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) -{ - Ins *i; - AClass *ac, *a, aret; - int ca, ni, ns; - uint stk, off; - Ref r, r1, r2, reg[2], regcp[2]; - RAlloc *ra; - - ac = alloc((i1-i0) * sizeof ac[0]); - if (!req(i1->arg[1], R)) { - assert(rtype(i1->arg[1]) == RAType); - aclass(&aret, &typ[i1->arg[1].val & AMask]); - ca = classify(i0, i1, ac, OArg, &aret); - } else - ca = classify(i0, i1, ac, OArg, 0); - - for (stk=0, a=&ac[i1-i0]; a>ac;) - if ((--a)->inmem) { - assert(a->align <= 4); - stk += a->size; - if (a->align == 4) - stk += stk & 15; - } - stk += stk & 15; - if (stk) { - r = getcon(-(int64_t)stk, fn); - emit(OSAlloc, Kl, R, r, R); - } - - if (!req(i1->arg[1], R)) { - if (aret.inmem) { - /* get the return location from eax - * it saves one callee-save reg */ - r1 = newtmp("abi", Kl, fn); - emit(OCopy, Kl, i1->to, TMP(RAX), R); - ca += 1; - } else { - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - regcp[1] = newtmp("abi", aret.cls[1], fn); - emit(OStorel, 0, R, regcp[1], r); - emit(OAdd, Kl, r, i1->to, getcon(8, fn)); - chuse(i1->to, +1, fn); - ca += 1 << (2 * KBASE(aret.cls[1])); - } - regcp[0] = newtmp("abi", aret.cls[0], fn); - emit(OStorel, 0, R, regcp[0], i1->to); - ca += 1 << (2 * KBASE(aret.cls[0])); - retr(reg, &aret); - if (aret.size > 8) - emit(OCopy, aret.cls[1], regcp[1], reg[1], R); - emit(OCopy, aret.cls[0], regcp[0], reg[0], R); - r1 = i1->to; - } - /* allocate return pad */ - ra = alloc(sizeof *ra); - /* specific to NAlign == 3 */ - aret.align -= 2; - if (aret.align < 0) - aret.align = 0; - ra->i.op = OAlloc + aret.align; - ra->i.cls = Kl; - ra->i.to = r1; - ra->i.arg[0] = getcon(aret.size, fn); - ra->link = (*rap); - *rap = ra; - } else { - ra = 0; - if (KBASE(i1->cls) == 0) { - emit(OCopy, i1->cls, i1->to, TMP(RAX), R); - ca += 1; - } else { - emit(OCopy, i1->cls, i1->to, TMP(XMM0), R); - ca += 1 << 2; - } - } - emit(OCall, i1->cls, R, i1->arg[0], CALL(ca)); - emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); - - ni = ns = 0; - if (ra && aret.inmem) - emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ - for (i=i0, a=ac; i<i1; i++, a++) { - if (a->inmem) - continue; - r1 = rarg(a->cls[0], &ni, &ns); - if (i->op == OArgc) { - if (a->size > 8) { - r2 = rarg(a->cls[1], &ni, &ns); - r = newtmp("abi", Kl, fn); - emit(OLoad, a->cls[1], r2, r, R); - emit(OAdd, Kl, r, i->arg[1], getcon(8, fn)); - chuse(i->arg[1], +1, fn); - } - emit(OLoad, a->cls[0], r1, i->arg[1], R); - } else - emit(OCopy, i->cls, r1, i->arg[0], R); - } - - if (!stk) - return; - - r = newtmp("abi", Kl, fn); - chuse(r, -1, fn); - for (i=i0, a=ac, off=0; i<i1; i++, a++) { - if (!a->inmem) - continue; - if (i->op == OArgc) { - if (a->align == 4) - off += off & 15; - blit(r, off, i->arg[1], a->size, fn); - } else { - r1 = newtmp("abi", Kl, fn); - emit(OStorel, 0, R, i->arg[0], r1); - emit(OAdd, Kl, r1, r, getcon(off, fn)); - chuse(r, +1, fn); - } - off += a->size; - } - emit(OSAlloc, Kl, r, getcon(stk, fn), R); -} - -static void -selpar(Fn *fn, Ins *i0, Ins *i1) -{ - AClass *ac, *a, aret; - Ins *i; - int ni, ns, s, al; - Ref r, r1; - - ac = alloc((i1-i0) * sizeof ac[0]); - curi = insb; - ni = ns = 0; - - if (fn->retty >= 0) { - aclass(&aret, &typ[fn->retty]); - if (aret.inmem) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl}; - fn->retr = r; - } - classify(i0, i1, ac, OPar, &aret); - } else - classify(i0, i1, ac, OPar, 0); - - /* specific to NAlign == 3 */ - - s = 4; - for (i=i0, a=ac; i<i1; i++, a++) { - switch (a->inmem) { - case 1: - assert(a->align <= 4); - if (a->align == 4) - s = (s+3) & -4; - fn->tmp[i->to.val].slot = -s; /* HACK! */ - s += a->size / 4; - continue; - case 2: - *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls}; - s += 2; - continue; - } - r1 = rarg(a->cls[0], &ni, &ns); - if (i->op == OParc) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {r1}, Kl}; - a->cls[0] = r.val; - if (a->size > 8) { - r1 = rarg(a->cls[1], &ni, &ns); - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {r1}, Kl}; - a->cls[1] = r.val; - } - } else - *curi++ = (Ins){OCopy, i->to, {r1}, i->cls}; - } - for (i=i0, a=ac; i<i1; i++, a++) { - if (i->op != OParc || a->inmem) - continue; - for (al=0; a->align >> (al+2); al++) - ; - r = TMP(a->cls[0]); - r1 = i->to; - *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl}; - *curi++ = (Ins){OStorel, R, {r, r1}, 0}; - if (a->size > 8) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl}; - r1 = TMP(a->cls[1]); - *curi++ = (Ins){OStorel, R, {r1, r}, 0}; - } - } -} - -static int aref(Ref r, ANum *ai) { switch (rtype(r)) { @@ -1031,62 +568,12 @@ void isel(Fn *fn) { Blk *b, **sb; - Ins *i, *i0, *ip; + Ins *i; Phi *p; uint a; int n, al; int64_t sz; ANum *ainfo; - RAlloc *ral; - - for (n=0; n<fn->ntmp; n++) - fn->tmp[n].slot = -1; - fn->slot = 0; - - /* lower arguments */ - for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) - if (i->op != OPar && i->op != OParc) - break; - selpar(fn, b->ins, i); - n = b->nins - (i - b->ins) + (curi - insb); - i0 = alloc(n * sizeof(Ins)); - ip = icpy(ip = i0, insb, curi - insb); - ip = icpy(ip, i, &b->ins[b->nins] - i); - b->nins = n; - b->ins = i0; - - /* lower function calls and returns */ - ral = 0; - b = fn->start; - do { - if (!(b = b->link)) - b = fn->start; /* do it last */ - curi = &insb[NIns]; - selret(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) { - if ((--i)->op == OCall) { - for (i0=i; i0>b->ins; i0--) - if ((i0-1)->op != OArg) - if ((i0-1)->op != OArgc) - break; - selcall(fn, i0, i, &ral); - i = i0; - continue; - } - assert(i->op != OArg && i->op != OArgc); - emiti(*i); - } - if (b == fn->start) - for (; ral; ral=ral->link) - emiti(ral->i); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } while (b != fn->start); - - if (debug['A']) { - fprintf(stderr, "\n> After call lowering:\n"); - printfn(fn, stderr); - } /* assign slots to fast allocs */ b = fn->start; diff --git a/main.c b/main.c index c1664be..b7f5b3f 100644 --- a/main.c +++ b/main.c @@ -49,6 +49,7 @@ func(Fn *fn) filluse(fn); copy(fn); filluse(fn); + abi(fn); isel(fn); filllive(fn); fillcost(fn); diff --git a/parse.c b/parse.c index 1fd032a..521e2bf 100644 --- a/parse.c +++ b/parse.c @@ -143,10 +143,7 @@ static struct { } tokval; static int lnum; -static Tmp *tmp; -static Con *con; -static int ntmp; -static int ncon; +static Fn *curf; static Phi **plink; static Blk **bmap; static Blk *curb; @@ -360,11 +357,11 @@ tmpref(char *v) { int t; - for (t=Tmp0; t<ntmp; t++) - if (strcmp(v, tmp[t].name) == 0) + for (t=Tmp0; t<curf->ntmp; t++) + if (strcmp(v, curf->tmp[t].name) == 0) return TMP(t); - vgrow(&tmp, ++ntmp); - strcpy(tmp[t].name, v); + newtmp(0, Kw, curf); + strcpy(curf->tmp[t].name, v); return TMP(t); } @@ -396,13 +393,13 @@ parseref() c.type = CAddr; strcpy(c.label, tokval.str); Look: - for (i=0; i<ncon; i++) - if (con[i].type == c.type - && con[i].bits.i == c.bits.i - && strcmp(con[i].label, c.label) == 0) + for (i=0; i<curf->ncon; i++) + if (curf->con[i].type == c.type + && curf->con[i].bits.i == c.bits.i + && strcmp(curf->con[i].label, c.label) == 0) return CON(i); - vgrow(&con, ++ncon); - con[i] = c; + vgrow(&curf->con, ++curf->ncon); + curf->con[i] = c; return CON(i); default: return R; @@ -648,29 +645,31 @@ DoOp: static Fn * parsefn(int export) { + int r; PState ps; - Fn *fn; - ntmp = Tmp0; - ncon = 1; /* first constant must be 0 */ curb = 0; nblk = 0; curi = insb; - tmp = vnew(ntmp, sizeof tmp[0]); - con = vnew(ncon, sizeof con[0]); + curf = alloc(sizeof *curf); + curf->ntmp = 0; + curf->ncon = 1; /* first constant must be 0 */ + curf->tmp = vnew(curf->ntmp, sizeof curf->tmp[0]); + curf->con = vnew(curf->ncon, sizeof curf->con[0]); + for (r=0; r<Tmp0; r++) + newtmp(0, r < XMM0 ? Kl : Kd, curf); bmap = vnew(nblk, sizeof bmap[0]); - con[0].type = CBits; - fn = alloc(sizeof *fn); - fn->export = export; - blink = &fn->start; - fn->retty = -1; + curf->con[0].type = CBits; + curf->export = export; + blink = &curf->start; + curf->retty = -1; if (peek() != TGlo) - rcls = parsecls(&fn->retty); + rcls = parsecls(&curf->retty); else rcls = 5; if (next() != TGlo) err("function name expected"); - strcpy(fn->name, tokval.str); + strcpy(curf->name, tokval.str); parserefl(0); if (nextnl() != TLBrace) err("function body must start with {"); @@ -682,15 +681,11 @@ parsefn(int export) err("empty file"); if (curb->jmp.type == JXXX) err("last block misses jump"); - fn->tmp = tmp; - fn->con = con; - fn->mem = vnew(0, sizeof fn->mem[0]); - fn->ntmp = ntmp; - fn->ncon = ncon; - fn->nmem = 0; - fn->nblk = nblk; - fn->rpo = 0; - return fn; + curf->mem = vnew(0, sizeof curf->mem[0]); + curf->nmem = 0; + curf->nblk = nblk; + curf->rpo = 0; + return curf; } static void diff --git a/sysv.c b/sysv.c new file mode 100644 index 0000000..78c2f07 --- /dev/null +++ b/sysv.c @@ -0,0 +1,512 @@ +#include "all.h" + +typedef struct AClass AClass; +typedef struct RAlloc RAlloc; + +struct AClass { + int inmem; + int align; + uint size; + int cls[2]; +}; + +static void +aclass(AClass *a, Typ *t) +{ + int e, s, n, cls; + uint sz, al; + + sz = t->size; + al = 1u << t->align; + + /* the ABI requires sizes to be rounded + * up to the nearest multiple of 8, moreover + * it makes it easy load and store structures + * in registers + */ + if (al < 8) + al = 8; + sz = (sz + al-1) & -al; + + a->size = sz; + a->align = t->align; + + if (t->dark || sz > 16) { + /* large or unaligned structures are + * required to be passed in memory + */ + a->inmem = 1; + return; + } + + a->inmem = 0; + for (e=0, s=0; e<2; e++) { + cls = -1; + for (n=0; n<8 && t->seg[s].len; s++) { + if (t->seg[s].ispad) { + /* don't change anything */ + } + else if (t->seg[s].isflt) { + if (cls == -1) + cls = Kd; + } + else + cls = Kl; + n += t->seg[s].len; + } + assert(n <= 8); + a->cls[e] = cls; + } +} + +static void +blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) +{ + Ref r, r1; + uint boff; + + /* it's an impolite blit, we might go across the end + * of the source object a little bit... */ + for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { + r = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, r, r1); + emit(OAdd, Kl, r1, rstk, getcon(soff, fn)); + r1 = newtmp("abi", Kl, fn); + emit(OLoad, Kl, r, r1, R); + emit(OAdd, Kl, r1, rsrc, getcon(boff, fn)); + chuse(rsrc, +1, fn); + chuse(rstk, +1, fn); + } +} + +static int +retr(Ref reg[2], AClass *aret) +{ + static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; + int n, k, ca, nr[2]; + + nr[0] = nr[1] = 0; + ca = 0; + for (n=0; aret->cls[n]>=0 && n<2; n++) { + k = KBASE(aret->cls[n]); + reg[n] = TMP(retreg[k][nr[k]++]); + ca += 1 << (2 * k); + } + return ca; +} + +static void +selret(Blk *b, Fn *fn) +{ + int j, k, ca; + Ref r, r0, reg[2]; + AClass aret; + + j = b->jmp.type; + + if (!isret(j) || j == JRet0) + return; + + r0 = b->jmp.arg; + b->jmp.type = JRet0; + + if (j == JRetc) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + assert(rtype(fn->retr) == RTmp); + emit(OCopy, Kl, TMP(RAX), fn->retr, R); + chuse(fn->retr, +1, fn); + blit(fn->retr, 0, r0, aret.size, fn); + ca = 1; + } else { + ca = retr(reg, &aret); + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + emit(OLoad, Kl, reg[1], r, R); + emit(OAdd, Kl, r, r0, getcon(8, fn)); + chuse(r0, +1, fn); + } + emit(OLoad, Kl, reg[0], r0, R); + } + } else { + k = j - JRetw; + if (KBASE(k) == 0) { + emit(OCopy, k, TMP(RAX), r0, R); + ca = 1; + } else { + emit(OCopy, k, TMP(XMM0), r0, R); + ca = 1 << 2; + } + } + + b->jmp.arg = CALL(ca); +} + +static int +classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret) +{ + int nint, ni, nsse, ns, n, *pn; + AClass *a; + Ins *i; + + if (aret && aret->inmem) + nint = 5; /* hidden argument */ + else + nint = 6; + nsse = 8; + for (i=i0, a=ac; i<i1; i++, a++) { + if (i->op == op) { + if (KBASE(i->cls) == 0) + pn = &nint; + else + pn = &nsse; + if (*pn > 0) { + --*pn; + a->inmem = 0; + } else + a->inmem = 2; + a->align = 3; + a->size = 8; + a->cls[0] = i->cls; + } else { + n = i->arg[0].val & AMask; + aclass(a, &typ[n]); + if (a->inmem) + continue; + ni = ns = 0; + for (n=0; n<2; n++) + if (KBASE(a->cls[n]) == 0) + ni++; + else + ns++; + if (nint >= ni && nsse >= ns) { + nint -= ni; + nsse -= ns; + } else + a->inmem = 1; + } + } + + return ((6-nint) << 4) | ((8-nsse) << 8); +} + +int rsave[] = { + RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 +}; +int rclob[] = {RBX, R12, R13, R14, R15}; + +MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); +MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); + +bits +retregs(Ref r, int p[2]) +{ + bits b; + int ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = r.val & 3; + nf = (r.val >> 2) & 3; + if (ni >= 1) + b |= BIT(RAX); + if (ni >= 2) + b |= BIT(RDX); + if (nf >= 1) + b |= BIT(XMM0); + if (nf >= 2) + b |= BIT(XMM1); + if (p) { + p[0] = ni; + p[1] = nf; + } + return b; +} + +bits +argregs(Ref r, int p[2]) +{ + bits b; + int j, ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = (r.val >> 4) & 15; + nf = (r.val >> 8) & 15; + for (j=0; j<ni; j++) + b |= BIT(rsave[j]); + for (j=0; j<nf; j++) + b |= BIT(XMM0+j); + if (p) { + p[0] = ni + 1; + p[1] = nf; + } + return b | BIT(RAX); +} + +static Ref +rarg(int ty, int *ni, int *ns) +{ + if (KBASE(ty) == 0) + return TMP(rsave[(*ni)++]); + else + return TMP(XMM0 + (*ns)++); +} + +struct RAlloc { + Ins i; + RAlloc *link; +}; + +static void +selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) +{ + Ins *i; + AClass *ac, *a, aret; + int ca, ni, ns; + uint stk, off; + Ref r, r1, r2, reg[2], regcp[2]; + RAlloc *ra; + + ac = alloc((i1-i0) * sizeof ac[0]); + if (!req(i1->arg[1], R)) { + assert(rtype(i1->arg[1]) == RAType); + aclass(&aret, &typ[i1->arg[1].val & AMask]); + ca = classify(i0, i1, ac, OArg, &aret); + } else + ca = classify(i0, i1, ac, OArg, 0); + + for (stk=0, a=&ac[i1-i0]; a>ac;) + if ((--a)->inmem) { + assert(a->align <= 4); + stk += a->size; + if (a->align == 4) + stk += stk & 15; + } + stk += stk & 15; + if (stk) { + r = getcon(-(int64_t)stk, fn); + emit(OSAlloc, Kl, R, r, R); + } + + if (!req(i1->arg[1], R)) { + if (aret.inmem) { + /* get the return location from eax + * it saves one callee-save reg */ + r1 = newtmp("abi", Kl, fn); + emit(OCopy, Kl, i1->to, TMP(RAX), R); + ca += 1; + } else { + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + regcp[1] = newtmp("abi", aret.cls[1], fn); + emit(OStorel, 0, R, regcp[1], r); + emit(OAdd, Kl, r, i1->to, getcon(8, fn)); + chuse(i1->to, +1, fn); + ca += 1 << (2 * KBASE(aret.cls[1])); + } + regcp[0] = newtmp("abi", aret.cls[0], fn); + emit(OStorel, 0, R, regcp[0], i1->to); + ca += 1 << (2 * KBASE(aret.cls[0])); + retr(reg, &aret); + if (aret.size > 8) + emit(OCopy, aret.cls[1], regcp[1], reg[1], R); + emit(OCopy, aret.cls[0], regcp[0], reg[0], R); + r1 = i1->to; + } + /* allocate return pad */ + ra = alloc(sizeof *ra); + /* specific to NAlign == 3 */ + aret.align -= 2; + if (aret.align < 0) + aret.align = 0; + ra->i.op = OAlloc + aret.align; + ra->i.cls = Kl; + ra->i.to = r1; + ra->i.arg[0] = getcon(aret.size, fn); + ra->link = (*rap); + *rap = ra; + } else { + ra = 0; + if (KBASE(i1->cls) == 0) { + emit(OCopy, i1->cls, i1->to, TMP(RAX), R); + ca += 1; + } else { + emit(OCopy, i1->cls, i1->to, TMP(XMM0), R); + ca += 1 << 2; + } + } + emit(OCall, i1->cls, R, i1->arg[0], CALL(ca)); + emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); + + ni = ns = 0; + if (ra && aret.inmem) + emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ + for (i=i0, a=ac; i<i1; i++, a++) { + if (a->inmem) + continue; + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OArgc) { + if (a->size > 8) { + r2 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + emit(OLoad, a->cls[1], r2, r, R); + emit(OAdd, Kl, r, i->arg[1], getcon(8, fn)); + chuse(i->arg[1], +1, fn); + } + emit(OLoad, a->cls[0], r1, i->arg[1], R); + } else + emit(OCopy, i->cls, r1, i->arg[0], R); + } + + if (!stk) + return; + + r = newtmp("abi", Kl, fn); + chuse(r, -1, fn); + for (i=i0, a=ac, off=0; i<i1; i++, a++) { + if (!a->inmem) + continue; + if (i->op == OArgc) { + if (a->align == 4) + off += off & 15; + blit(r, off, i->arg[1], a->size, fn); + } else { + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, i->arg[0], r1); + emit(OAdd, Kl, r1, r, getcon(off, fn)); + chuse(r, +1, fn); + } + off += a->size; + } + emit(OSAlloc, Kl, r, getcon(stk, fn), R); +} + +static void +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + AClass *ac, *a, aret; + Ins *i; + int ni, ns, s, al; + Ref r, r1; + + ac = alloc((i1-i0) * sizeof ac[0]); + curi = insb; + ni = ns = 0; + + if (fn->retty >= 0) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl}; + fn->retr = r; + } + classify(i0, i1, ac, OPar, &aret); + } else + classify(i0, i1, ac, OPar, 0); + + /* specific to NAlign == 3 */ + + s = 4; + for (i=i0, a=ac; i<i1; i++, a++) { + switch (a->inmem) { + case 1: + assert(a->align <= 4); + if (a->align == 4) + s = (s+3) & -4; + fn->tmp[i->to.val].slot = -s; /* HACK! */ + s += a->size / 4; + continue; + case 2: + *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls}; + s += 2; + continue; + } + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OParc) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[0] = r.val; + if (a->size > 8) { + r1 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[1] = r.val; + } + } else + *curi++ = (Ins){OCopy, i->to, {r1}, i->cls}; + } + for (i=i0, a=ac; i<i1; i++, a++) { + if (i->op != OParc || a->inmem) + continue; + for (al=0; a->align >> (al+2); al++) + ; + r = TMP(a->cls[0]); + r1 = i->to; + *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl}; + *curi++ = (Ins){OStorel, R, {r, r1}, 0}; + if (a->size > 8) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl}; + r1 = TMP(a->cls[1]); + *curi++ = (Ins){OStorel, R, {r1, r}, 0}; + } + } +} + +void +abi(Fn *fn) +{ + Blk *b; + Ins *i, *i0, *ip; + RAlloc *ral; + int n; + + /* lower arguments */ + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (i->op != OPar && i->op != OParc) + break; + selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (curi - insb); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, insb, curi - insb); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower calls and returns */ + ral = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) { + if ((--i)->op == OCall) { + for (i0=i; i0>b->ins; i0--) + if ((i0-1)->op != OArg) + if ((i0-1)->op != OArgc) + break; + selcall(fn, i0, i, &ral); + i = i0; + continue; + } + assert(i->op != OArg && i->op != OArgc); + emiti(*i); + } + if (b == fn->start) + for (; ral; ral=ral->link) + emiti(ral->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After ABI lowering:\n"); + printfn(fn, stderr); + } +} diff --git a/util.c b/util.c index 1654491..238c996 100644 --- a/util.c +++ b/util.c @@ -187,7 +187,8 @@ newtmp(char *prfx, int k, Fn *fn) t = fn->ntmp++; vgrow(&fn->tmp, fn->ntmp); - sprintf(fn->tmp[t].name, "%s%d", prfx, ++n); + if (prfx) + sprintf(fn->tmp[t].name, "%s%d", prfx, ++n); fn->tmp[t].cls = k; fn->tmp[t].slot = -1; fn->tmp[t].nuse = +1; @@ -195,6 +196,13 @@ newtmp(char *prfx, int k, Fn *fn) return TMP(t); } +void +chuse(Ref r, int du, Fn *fn) +{ + if (rtype(r) == RTmp) + fn->tmp[r.val].nuse += du; +} + Ref getcon(int64_t val, Fn *fn) { @@ -203,8 +211,7 @@ getcon(int64_t val, Fn *fn) for (c=0; c<fn->ncon; c++) if (fn->con[c].type == CBits && fn->con[c].bits.i == val) return CON(c); - fn->ncon++; - vgrow(&fn->con, fn->ncon); + vgrow(&fn->con, ++fn->ncon); fn->con[c] = (Con){.type = CBits, .bits.i = val}; return CON(c); } |