diff options
author | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-08-14 15:54:25 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-09-15 23:01:31 -0400 |
commit | 67d3c2834d61026d5556cf9846f707844109cf33 (patch) | |
tree | 854a0846ccacfe2ba8ab39f9033757bba8e97960 | |
parent | 0f3846e261afd21be1f66bad151772349349583b (diff) | |
download | roux-67d3c2834d61026d5556cf9846f707844109cf33.tar.gz |
tentative support for fast allocs
It seems that the MEM reference type is meaningless in too many positions. Because of this, it is unclear if we should keep it or just introduce a OAddr instruction that only accepts slots. Regardless of the above, the spilling module needs to use the new slot_() function, also, the emit function needs to fetch the size of the stack frame from the slot[] array. The naming is still very transitional, here is a list of all bogus names I can think of: - SLOT() - Tmp.spill - slot_
-rw-r--r-- | lisc/emit.c | 23 | ||||
-rw-r--r-- | lisc/isel.c | 158 | ||||
-rw-r--r-- | lisc/lisc.h | 6 | ||||
-rw-r--r-- | lisc/parse.c | 9 | ||||
-rw-r--r-- | lisc/spill.c | 1 |
5 files changed, 170 insertions, 27 deletions
diff --git a/lisc/emit.c b/lisc/emit.c index 76ce82c..d5b1e80 100644 --- a/lisc/emit.c +++ b/lisc/emit.c @@ -76,13 +76,14 @@ eref(Ref r, Fn *fn, FILE *f) { switch (rtype(r)) { default: - diag("emitref: invalid reference"); + diag("emit: invalid reference"); case RTmp: assert(r.val < Tmp0); fprintf(f, "%%%s", rtoa(r.val)); break; + case RMem: case RSlot: - fprintf(f, "-%d(%%rbp)", 8 * r.val); + fprintf(f, "-%d(%%rbp)", 4 * r.val); break; case RCon: fprintf(f, "$"); @@ -94,9 +95,18 @@ eref(Ref r, Fn *fn, FILE *f) static void emem(Ref r, Fn *fn, FILE *f) { + /* this function is now a hack + * when full memory references + * are supported, constants and + * temporaries will not have + * multiple meanings as they do + * now + */ + switch (rtype(r)) { default: - diag("emem: invalid memory reference"); + diag("emit: invalid memory reference"); + case RMem: case RSlot: eref(r, fn, f); break; @@ -171,7 +181,10 @@ eins(Ins i, Fn *fn, FILE *f) break; } } - if (!req(i.arg[0], i.to)) + if (rtype(i.arg[0]) == RMem) { + assert(rtype(i.to)==RTmp && i.to.val<EAX); + eop("lea", i.arg[0], i.to, fn, f); + } else if (!req(i.arg[0], i.to)) eop("mov", i.arg[0], i.to, fn, f); break; case OStorel: @@ -258,8 +271,6 @@ emitfn(Fn *fn, FILE *f) "liscf:\n" "\tpush %%rbp\n" "\tmov %%rsp, %%rbp\n" - "\tsub $%u, %%rsp\n", - fn->nspill * 8 ); for (b=fn->start; b; b=b->link) { fprintf(f, ".L%s:\n", b->name); diff --git a/lisc/isel.c b/lisc/isel.c index a9e3ee2..7081eea 100644 --- a/lisc/isel.c +++ b/lisc/isel.c @@ -1,15 +1,15 @@ #include "lisc.h" #include <limits.h> -/* For x86_64, we have to: +/* For x86_64, do the following: * * - check that constants are used only in * places allowed - * * - ensure immediates always fit in 32b - * * - explicit machine register contraints * on instructions like division. + * - implement fast locals (the streak of + * constant allocX in the first basic block) * * - lower calls (future, I have to think * about their representation and the @@ -111,8 +111,24 @@ static void sel(Ins i, Fn *fn) { Ref r0, r1, ra, rd; - int n, ty, c; + int n, ty, c, s; int64_t val; + struct { + Ref r; + int s; + } cpy[2]; + + for (n=0; n<2; n++) { + r0 = i.arg[n]; + cpy[n].s = 0; + if (rtype(r0) == RTmp) + if ((s = fn->tmp[r0.val].spill)) { + r0 = newtmp(TLong, fn); + i.arg[n] = r0; + cpy[n].r = r0; + cpy[n].s = s; + } + } switch (i.op) { case ODiv: @@ -158,16 +174,24 @@ sel(Ins i, Fn *fn) n = 0; goto Emit; case OStorel: - n = 1; - goto Emit; case OStorew: case OStoreb: case OStores: + if (cpy[1].s) { + i.arg[1] = MEM(cpy[1].s); + cpy[1].s = 0; + } + n = i.op == OStorel; + goto Emit; case OLoad: case OLoadss: case OLoadus: case OLoadsb: case OLoadub: + if (cpy[0].s) { + i.arg[0] = MEM(cpy[0].s); + cpy[0].s = 0; + } n = 0; Emit: emit(i.op, i.to, i.arg[0], i.arg[1]); @@ -184,27 +208,31 @@ Emit: } break; case OAlloc: - /* we need to make sure + case OAlloc+1: + case OAlloc+2: /* == OAlloc1 */ + /* if this is not a fast alloc + * we need to make sure * the stack remains aligned * (rsp + 8 = 0) mod 16 - * as a consequence, the alloc - * alignment is 8, we can change - * that in the future */ + if (req(i.to, R)) + break; if (rtype(i.arg[0]) == RCon) { val = fn->con[i.arg[0].val].val; val = (val + 15) & ~INT64_C(15); if (val < 0 || val > INT32_MAX) diag("isel: alloc too large"); emit(OAlloc, i.to, newcon(val, fn), R); - } else { + } else if (i.op < OAlloc1) { /* r0 = (i.arg[0] + 15) & -16 */ r0 = newtmp(TLong, fn); r1 = newtmp(TLong, fn); emit(OAlloc, i.to, r0, R); emit(OAnd, r0, r1, newcon(-16, fn)); emit(OAdd, r1, i.arg[0], newcon(15, fn)); - } + } else + /* todo, support it! */ + diag("isel: unsupported alloc alignment"); break; default: if (OCmp <= i.op && i.op <= OCmp1) { @@ -217,6 +245,10 @@ Emit: } diag("isel: non-exhaustive implementation"); } + + for (n=0; n<2; n++) + if (cpy[n].s) + emit(OCopy, cpy[n].r, MEM(cpy[n].s), R); } static Ins * @@ -285,6 +317,63 @@ seljmp(Blk *b, Fn *fn) } } +int +slot_(int sz, int al /* log2 */, Fn *fn) +{ + enum { N = sizeof fn->slot / sizeof fn->slot[0] }; + int j, k, s, l, a, ret; + int *sa; + + sa = fn->slot; + a = 1 << al; + l = sz; + + if (l > a) { + /* for large slots, we just + * tack them on the next max + * alignment slot available + * todo, could sophisticate + */ + l = (l + a-1) & ~(a-1); + s = sa[N-1] + l; + ret = s; + for (j=0, k=1; j<N; j++, k*=2) { + l = (l + k-1) & ~(k-1); + sa[j] = sa[N-1] + l; + } + } else { + j = al; + s = sa[j] + a; + ret = s; + Shift: + if (j < N-1 && s < sa[j+1]) + /* ........-----------... + * ^ ^ ^ + * sa[j] sa[j]+a sa[j+1] + * + * we have to skip to the + * next large whole + */ + s = sa[j+1]; + + for (k=0; k<=j; k++) + /* move all smaller holes + * that we contain with us + */ + if (sa[k] == sa[j]) + sa[k] = s; + + if (j < N-1 && s > sa[j+1]) { + /* we were in a bigger hole, + * it needs to shift further + */ + s = sa[++j] + (a *= 2); + goto Shift; + } + } + return ret; +} + /* instruction selection * requires use counts (as given by parsing) */ @@ -293,18 +382,53 @@ isel(Fn *fn) { Blk *b; Ins *i; - uint nins; + Phi *p; + Ref *a; + int n, al, s; + int64_t sz; + + /* 1. assign slots to fast allocs */ + for (n=Tmp0; n<fn->ntmp; n++) + fn->tmp[n].spill = 0; + fn->slot[0] = 0; + fn->slot[1] = 0; + fn->slot[2] = 2; + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (OAlloc <= i->op && i->op <= OAlloc1) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].val; + if (sz < 0 || sz >= INT_MAX-3) + diag("isel: invalid alloc size"); + sz = (sz + 3) / 4; + al = i->op - OAlloc; + s = slot_(sz, al, fn); + assert(s > 0); + fn->tmp[i->to.val].spill = s; + i->to = R; + } + + /* 2. select machine instructions */ for (b=fn->start; b; b=b->link) { + + /* 2.1. process block instructions */ curi = &insb[NIns]; seljmp(b, fn); for (i=&b->ins[b->nins]; i>b->ins;) { sel(*--i, fn); } - nins = &insb[NIns] - curi; + n = &insb[NIns] - curi; free(b->ins); - b->ins = alloc(nins * sizeof b->ins[0]); - memcpy(b->ins, curi, nins * sizeof b->ins[0]); - b->nins = nins; + b->ins = alloc(n * sizeof b->ins[0]); + memcpy(b->ins, curi, n * sizeof b->ins[0]); + b->nins = n; + + /* 2.2. fix fast local references in phis */ + for (p=b->phi; p; p=p->link) + for (a=p->arg; a-p->arg < p->narg; a++) + if (rtype(*a) == RTmp) + if ((s = fn->tmp[a->val].spill)) + *a = MEM(s); } } diff --git a/lisc/lisc.h b/lisc/lisc.h index 3ae5c56..50c6504 100644 --- a/lisc/lisc.h +++ b/lisc/lisc.h @@ -90,6 +90,7 @@ struct Ref { enum { RTmp, RCon, + RMem, RSlot, NRef = (1<<14) - 1 }; @@ -98,6 +99,7 @@ enum { #define TMP(x) (Ref){RTmp, x} #define CON(x) (Ref){RCon, x} #define CON_Z CON(0) /* reserved zero constant */ +#define MEM(x) (Ref){RMem, x} #define SLOT(x) (Ref){RSlot, x} static inline int req(Ref a, Ref b) @@ -139,6 +141,7 @@ enum Op { OLoadub, OCopy, OAlloc, + OAlloc1 = OAlloc + 2, NPubOp, /* reserved instructions */ @@ -237,7 +240,7 @@ struct Fn { int ncon; int nblk; Blk **rpo; - uint nspill; + int slot[3]; }; @@ -262,6 +265,7 @@ void ssafix(Fn *, int); void filllive(Fn *); /* isel.c */ +int slot_(int, int, Fn *); void isel(Fn *); /* spill.c */ diff --git a/lisc/parse.c b/lisc/parse.c index 5a06911..f51d5a5 100644 --- a/lisc/parse.c +++ b/lisc/parse.c @@ -26,7 +26,6 @@ OpDesc opdesc[NOp] = { [OLoadus] = { "loadus", 1, 0 }, [OLoadsb] = { "loadsb", 1, 0 }, [OLoadub] = { "loadub", 1, 0 }, - [OAlloc] = { "alloc", 1, 1 }, [OCopy] = { "copy", 1, 1 }, [ONop] = { "nop", 0, 0 }, [OSwap] = { "swap", 2, 2 }, @@ -34,6 +33,9 @@ OpDesc opdesc[NOp] = { [OXDiv] = { "xdiv", 1, 1 }, [OXCmpw] = { "xcmpw", 2, 1 }, [OXCmpl] = { "xcmpl", 2, 1 }, + [OAlloc] = { "alloc4", 1, 1 }, + [OAlloc+1] = { "alloc8", 1, 1 }, + [OAlloc+2] = { "alloc16", 1, 1 }, #define X(c) \ [OCmp+C##c] = { "c" #c, 2, 0 }, \ @@ -559,7 +561,10 @@ printref(Ref r, Fn *fn, FILE *f) } break; case RSlot: - fprintf(f, "$%d", r.val); + fprintf(f, "S%d", r.val); + break; + case RMem: + fprintf(f, "M%d", r.val); break; } return ""; diff --git a/lisc/spill.c b/lisc/spill.c index ad461f0..a0f6f38 100644 --- a/lisc/spill.c +++ b/lisc/spill.c @@ -375,5 +375,4 @@ spill(Fn *fn) b->ins = alloc(b->nins * sizeof(Ins)); memcpy(b->ins, curi, b->nins * sizeof(Ins)); } - fn->nspill = ns; } |