diff options
author | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-09-10 22:55:03 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-09-15 23:01:33 -0400 |
commit | 8201c6161e215e4716f3305e3dcb1e6b3de15ea9 (patch) | |
tree | 92596cdc92b02bb1039d41ff46002f855c8e607c | |
parent | 7e86fc39fcee98595a723daac39aa8a4ee3dfc96 (diff) | |
download | roux-8201c6161e215e4716f3305e3dcb1e6b3de15ea9.tar.gz |
start function call lowering (wip)
-rw-r--r-- | lisc/emit.c | 20 | ||||
-rw-r--r-- | lisc/isel.c | 206 | ||||
-rw-r--r-- | lisc/lisc.h | 11 | ||||
-rw-r--r-- | lisc/parse.c | 45 |
4 files changed, 249 insertions, 33 deletions
diff --git a/lisc/emit.c b/lisc/emit.c index 449824c..8f370d9 100644 --- a/lisc/emit.c +++ b/lisc/emit.c @@ -221,11 +221,8 @@ eins(Ins i, Fn *fn, FILE *f) emitf(fn, f, "\t%s%w %M, %R\n", otoa[i.op], i.wide, i.arg[0], i.to); break; - case OAlloc: - emitf(fn, f, "\tsub%w %R, %R\n", - 1, i.arg[0], TMP(RSP)); - emitf(fn, f, "\tmov%w %R, %R\n", - 1, TMP(RSP), i.to); + case OCall: + emitf(fn, f, "\tcall%w %R\n", 1, i.arg[0]); break; case OAddr: emitf(fn, f, "\tlea%w %M, %R\n", @@ -244,6 +241,19 @@ eins(Ins i, Fn *fn, FILE *f) } else diag("emit: unhandled instruction (2)"); break; + case OSAlloc: + emitf(fn, f, "\tsub%w %R, %R\n", + 1, i.arg[0], TMP(RSP)); + if (!req(i.to, R)) + emitf(fn, f, "\tmov%w %R, %R\n", + 1, TMP(RSP), i.to); + break; + case OXPush: + emitf(fn, f, "\tpush%w %R\n", 1, i.arg[0]); + break; + case OXMovs: + emitf(fn, f, "\trep movsb\n"); + break; case OXDiv: emitf(fn, f, "\tidiv%w %R\n", i.wide, i.arg[0]); break; diff --git a/lisc/isel.c b/lisc/isel.c index 6d7e7af..6efbd70 100644 --- a/lisc/isel.c +++ b/lisc/isel.c @@ -3,6 +3,7 @@ /* For x86_64, do the following: * + * - lower calls * - check that constants are used only in * places allowed * - ensure immediates always fit in 32b @@ -10,11 +11,6 @@ * on instructions like division. * - implement fast locals (the streak of * constant allocX in the first basic block) - * - * - lower calls (future, I have to think - * about their representation and the - * way I deal with structs/unions in the - * ABI) */ extern Ins insb[NIns], *curi; /* shared work buffer */ @@ -155,6 +151,13 @@ sel(Ins i, Fn *fn) break; case ONop: break; + case OXPush: + n = 1; + goto Emit; + case OCall: + case OXMovs: + case OSAlloc: + case OCopy: case OSext: case OZext: n = 0; @@ -163,7 +166,6 @@ sel(Ins i, Fn *fn) case OSub: case OMul: case OAnd: - case OCopy: case OXTest: n = w ? 2 : 0; goto Emit; @@ -221,7 +223,7 @@ Emit: /* r0 = (i.arg[0] + 15) & -16 */ r0 = newtmp(fn); r1 = newtmp(fn); - emit(OAlloc, 0, i.to, r0, R); + emit(OSAlloc, 0, i.to, r0, R); emit(OAnd, 1, r0, r1, newcon(-16, fn)); emit(OAdd, 1, r1, i.arg[0], newcon(15, fn)); } @@ -381,6 +383,170 @@ slota(int sz, int al /* log2 */, int *sa) return ret; } +typedef struct AInfo AInfo; + +enum { + RNone, + RInt, + RSse, +}; + +struct AInfo { + int inmem; + int align; + uint size; + int rty[2]; +}; + +static void +classify(AInfo *a, Typ *t) +{ + int e, s, n, rty; + uint sz, al; + + sz = t->size; + al = 1u << t->align; + + /* the ABI requires sizes to be rounded + * up to the nearest multiple of 8, moreover + * it makes it easy load and store structures + * in registers + */ + if (al < 8) + al = 8; + if (sz & (al-1)) + sz = (sz + al-1) & -al; + + a->size = sz; + a->align = t->align; + + if (t->dark || sz > 16) { + /* large or unaligned structures are + * required to be passed in memory + */ + a->inmem = 1; + return; + } + + for (e=0, s=0; e<2; e++) { + rty = RNone; + for (n=0; n<8 && t->seg[s].len; s++) { + if (t->seg[s].flt) { + if (rty == RNone) + rty = RSse; + } else + rty = RInt; + n += t->seg[s].len; + } + assert(n <= 8); + a->rty[e] = rty; + } +} + +static void +selcall(Fn *fn, Ins *i0, Ins *i1) +{ + static int ireg[8] = { RDI, RSI, RDX, RCX, R8, R9, R10, R11 }; + Ins *i; + AInfo *ai, *a; + int nint, nsse, ni, ns, n; + uint stk, sz; + Ref r; + + ai = alloc((i1-i0) * sizeof ai[0]); + + nint = 6; + nsse = 8; + stk = 0; + for (i=i0, a=ai; i<i1; i++, a++) { + if (i->op == OArgc) { + classify(a, &typ[i->arg[0].val]); + if (a->inmem) + goto Mem; + ni = ns = 0; + for (n=0; n<2; n++) + switch (a->rty[n]) { + case RInt: + ni++; + break; + case RSse: + ns++; + break; + } + if (nint > ni && nsse > ns) { + nint -= ni; + nsse -= ns; + } else { + a->inmem = 1; + Mem: + stk += a->size; + if (a->align == 4 && stk % 16) + stk += 8; + } + } else { + if (nint > 0) { + nint--; + a->inmem = 0; + } else { + stk += 8; + a->inmem = 1; + } + a->align = 3; + a->size = 8; + a->rty[0] = RInt; + } + } + + if (!req(i1->arg[1], R)) + diag("struct-returning function not implemented"); + if (stk) + emit(OSAlloc, 0, R, newcon(-(int64_t)stk, fn), R); + for (n=0; n<2; n++) { + r = TMP(ireg[n]); + emit(OCopy, 0, R, r, R); + } + emit(OCopy, i1->wide, i1->to, TMP(RAX), R); + emit(OCall, 0, R, i->arg[0], R); + emit(OCopy, 0, TMP(RAX), CON_Z, R); + if (stk % 16) + emit(OXPush, 1, R, TMP(RAX), R); + + for (i=i0, a=ai, ni=0; i<i1; i++, a++) { + if (a->inmem) + continue; + if (i->op == OArgc) { + diag("aggregate in registers not implemented"); + } else { + r = TMP(ireg[ni++]); + emit(OCopy, i->wide, r, i->arg[0], R); + } + } + + stk = 0; + for (i=i0, a=ai; i<i1; i++, a++) { + if (!a->inmem) + continue; + sz = a->size; + if (a->align == 4 && stk % 16) + sz += 8; + stk += sz; + if (i->op == OArgc) { + emit(OCopy, 0, R, TMP(RCX), R); + emit(OCopy, 0, R, TMP(RDI), R); + emit(OCopy, 0, R, TMP(RSI), R); + emit(OXMovs, 0, R, R, R); + emit(OCopy, 1, TMP(RCX), newcon(a->size, fn), R); + emit(OCopy, 1, TMP(RDI), TMP(RSP), R); + emit(OCopy, 1, TMP(RSI), i->arg[1], R); + emit(OSAlloc, 0, R, newcon(sz, fn), R); + } else { + emit(OXPush, 1, R, i->arg[0], R); + } + } + + free(ai); +} + /* instruction selection * requires use counts (as given by parsing) */ @@ -388,12 +554,36 @@ void isel(Fn *fn) { Blk *b, **sb; - Ins *i; + Ins *i, *i0; Phi *p; uint a; int n, al, s; int64_t sz; + /* lower function calls */ + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (i=&b->ins[b->nins]; i>b->ins;) { + if ((--i)->op == OCall) { + i0 = i; + for (i0=i; i0>b->ins; i0--) + if ((i0-1)->op != OArg) + if ((i0-1)->op != OArgc) + break; + selcall(fn, i0, i); + i = i0; + continue; + } + assert(i->op != OArg && i->op != OArgc); + emit(i->op, i->wide, i->to, i->arg[0], i->arg[1]); + } + n = &insb[NIns] - curi; + free(b->ins); + b->ins = alloc(n * sizeof b->ins[0]); + memcpy(b->ins, curi, n * sizeof b->ins[0]); + b->nins = n; + } + /* assign slots to fast allocs */ for (n=Tmp0; n<fn->ntmp; n++) fn->tmp[n].spill = 0; diff --git a/lisc/lisc.h b/lisc/lisc.h index 0ae7c53..a33b649 100644 --- a/lisc/lisc.h +++ b/lisc/lisc.h @@ -43,7 +43,7 @@ enum Reg { Tmp0, /* first non-reg temporary */ - NReg = RDX - RAX + 1 + NReg = R11 - RAX + 1 }; enum { @@ -53,6 +53,7 @@ enum { NIns = 256, NAlign = 3, NSeg = 32, + NTyp = 128, BITS = 4, NBit = 64, @@ -144,6 +145,9 @@ enum Op { OAddr, OSwap, OSign, + OSAlloc, + OXPush, + OXMovs, OXDiv, OXCmp, OXSet, @@ -246,7 +250,7 @@ struct Typ { struct { uint flt:1; uint len:31; - } seg[NSeg]; + } seg[NSeg+1]; }; @@ -255,7 +259,8 @@ extern char debug['Z'+1]; void dumpts(Bits *, Tmp *, FILE *); /* parse.c */ -extern OpDesc opdesc[]; +extern Typ typ[NTyp]; +extern OpDesc opdesc[NOp]; void diag(char *); void *alloc(size_t); Blk *blocka(void); diff --git a/lisc/parse.c b/lisc/parse.c index e23cbb2..b0e480a 100644 --- a/lisc/parse.c +++ b/lisc/parse.c @@ -33,6 +33,9 @@ OpDesc opdesc[NOp] = { [ONop] = { "nop", 0, 0 }, [OSwap] = { "swap", 2, 2 }, [OSign] = { "sign", 1, 0 }, + [OXMovs] = { "xmovs", 0, 0 }, + [OXPush] = { "xpush", 1, 1 }, + [OSAlloc] = { "salloc", 1, 0 }, [OXDiv] = { "xdiv", 1, 1 }, [OXCmp] = { "xcmp", 2, 1 }, [OXTest] = { "xtest", 2, 1 }, @@ -51,6 +54,9 @@ OpDesc opdesc[NOp] = { #undef X }; +Typ typ[NTyp]; +static int ntyp; + typedef enum { PXXX, PLbl, @@ -367,26 +373,31 @@ parseref() } static int -parsecls(char *ty) +parsecls(int *tyn) { + int i; + switch (next()) { default: err("invalid class specifier"); + case TTyp: + for (i=0; i<ntyp; i++) + if (strcmp(tokval.str, typ[i].name) == 0) { + *tyn = i; + return 2; + } + err("undefined type"); case TW: return 0; case TL: return 1; - case TTyp: - strcpy(ty, tokval.str); - return 2; } } static void parseargl() { - char ty[NString]; - int w, t; + int w, t, ty; Ref r; expect(TLParen); @@ -397,12 +408,12 @@ parseargl() for (;;) { if (curi - insb >= NIns) err("too many instructions (1)"); - w = parsecls(ty); + w = parsecls(&ty); r = parseref(); if (req(r, R)) err("invalid reference argument"); if (w == 2) - *curi = (Ins){OArgc, 0, R, {TYP(0), r}}; + *curi = (Ins){OArgc, 0, R, {TYP(ty), r}}; else *curi = (Ins){OArg, w, R, {r, R}}; curi++; @@ -450,8 +461,7 @@ parseline(PState ps) Phi *phi; Ref r; Blk *b; - int t, op, i, w; - char ty[NString]; + int t, op, i, w, ty; t = nextnl(); if (ps == PLbl && t != TLbl && t != TRBrace) @@ -512,7 +522,7 @@ parseline(PState ps) } r = tmpref(tokval.str, 0); expect(TEq); - w = parsecls(ty); + w = parsecls(&ty); op = next(); DoOp: if (op == TPhi) { @@ -521,14 +531,13 @@ DoOp: op = -1; } if (op == TCall) { - /* do eet! */ arg[0] = parseref(); parseargl(); expect(TNL); op = OCall; if (w == 2) { w = 0; - arg[1] = TYP(0); + arg[1] = TYP(ty); } else arg[1] = R; goto Ins; @@ -632,7 +641,9 @@ parsetyp() Typ *ty; int t, n, sz, al, s, a, c, flt; - ty = alloc(sizeof *ty); + if (ntyp >= NTyp) + err("too many type definitions"); + ty = &typ[ntyp++]; ty->align = -1; if (nextnl() != TTyp || nextnl() != TEq) err("type name, then = expected"); @@ -722,6 +733,7 @@ parse(FILE *f) inf = f; lnum = 1; thead = TXXX; + ntyp = 0; for (;;) switch (nextnl()) { case TFunc: @@ -771,7 +783,7 @@ printref(Ref r, Fn *fn, FILE *f) fprintf(f, "S%d", r.val); break; case RTyp: - fprintf(f, ":%d", r.val); + fprintf(f, ":%s", typ[r.val].name); break; } } @@ -818,8 +830,7 @@ printfn(Fn *fn, FILE *f) assert(opdesc[i->op].name); fprintf(f, "%s", opdesc[i->op].name); if (req(i->to, R)) - if (i->op < OStorel || i->op > OStoreb) - if (i->op != OArgc) + if (i->op == OXCmp || i->op == OXTest) fprintf(f, i->wide ? "l" : "w"); if (!req(i->arg[0], R)) { fprintf(f, " "); |