diff options
-rw-r--r-- | lisc/emit.c | 23 | ||||
-rw-r--r-- | lisc/isel.c | 158 | ||||
-rw-r--r-- | lisc/lisc.h | 6 | ||||
-rw-r--r-- | lisc/parse.c | 9 | ||||
-rw-r--r-- | lisc/spill.c | 1 |
5 files changed, 170 insertions, 27 deletions
diff --git a/lisc/emit.c b/lisc/emit.c index 76ce82c..d5b1e80 100644 --- a/lisc/emit.c +++ b/lisc/emit.c @@ -76,13 +76,14 @@ eref(Ref r, Fn *fn, FILE *f) { switch (rtype(r)) { default: - diag("emitref: invalid reference"); + diag("emit: invalid reference"); case RTmp: assert(r.val < Tmp0); fprintf(f, "%%%s", rtoa(r.val)); break; + case RMem: case RSlot: - fprintf(f, "-%d(%%rbp)", 8 * r.val); + fprintf(f, "-%d(%%rbp)", 4 * r.val); break; case RCon: fprintf(f, "$"); @@ -94,9 +95,18 @@ eref(Ref r, Fn *fn, FILE *f) static void emem(Ref r, Fn *fn, FILE *f) { + /* this function is now a hack + * when full memory references + * are supported, constants and + * temporaries will not have + * multiple meanings as they do + * now + */ + switch (rtype(r)) { default: - diag("emem: invalid memory reference"); + diag("emit: invalid memory reference"); + case RMem: case RSlot: eref(r, fn, f); break; @@ -171,7 +181,10 @@ eins(Ins i, Fn *fn, FILE *f) break; } } - if (!req(i.arg[0], i.to)) + if (rtype(i.arg[0]) == RMem) { + assert(rtype(i.to)==RTmp && i.to.val<EAX); + eop("lea", i.arg[0], i.to, fn, f); + } else if (!req(i.arg[0], i.to)) eop("mov", i.arg[0], i.to, fn, f); break; case OStorel: @@ -258,8 +271,6 @@ emitfn(Fn *fn, FILE *f) "liscf:\n" "\tpush %%rbp\n" "\tmov %%rsp, %%rbp\n" - "\tsub $%u, %%rsp\n", - fn->nspill * 8 ); for (b=fn->start; b; b=b->link) { fprintf(f, ".L%s:\n", b->name); diff --git a/lisc/isel.c b/lisc/isel.c index a9e3ee2..7081eea 100644 --- a/lisc/isel.c +++ b/lisc/isel.c @@ -1,15 +1,15 @@ #include "lisc.h" #include <limits.h> -/* For x86_64, we have to: +/* For x86_64, do the following: * * - check that constants are used only in * places allowed - * * - ensure immediates always fit in 32b - * * - explicit machine register contraints * on instructions like division. + * - implement fast locals (the streak of + * constant allocX in the first basic block) * * - lower calls (future, I have to think * about their representation and the @@ -111,8 +111,24 @@ static void sel(Ins i, Fn *fn) { Ref r0, r1, ra, rd; - int n, ty, c; + int n, ty, c, s; int64_t val; + struct { + Ref r; + int s; + } cpy[2]; + + for (n=0; n<2; n++) { + r0 = i.arg[n]; + cpy[n].s = 0; + if (rtype(r0) == RTmp) + if ((s = fn->tmp[r0.val].spill)) { + r0 = newtmp(TLong, fn); + i.arg[n] = r0; + cpy[n].r = r0; + cpy[n].s = s; + } + } switch (i.op) { case ODiv: @@ -158,16 +174,24 @@ sel(Ins i, Fn *fn) n = 0; goto Emit; case OStorel: - n = 1; - goto Emit; case OStorew: case OStoreb: case OStores: + if (cpy[1].s) { + i.arg[1] = MEM(cpy[1].s); + cpy[1].s = 0; + } + n = i.op == OStorel; + goto Emit; case OLoad: case OLoadss: case OLoadus: case OLoadsb: case OLoadub: + if (cpy[0].s) { + i.arg[0] = MEM(cpy[0].s); + cpy[0].s = 0; + } n = 0; Emit: emit(i.op, i.to, i.arg[0], i.arg[1]); @@ -184,27 +208,31 @@ Emit: } break; case OAlloc: - /* we need to make sure + case OAlloc+1: + case OAlloc+2: /* == OAlloc1 */ + /* if this is not a fast alloc + * we need to make sure * the stack remains aligned * (rsp + 8 = 0) mod 16 - * as a consequence, the alloc - * alignment is 8, we can change - * that in the future */ + if (req(i.to, R)) + break; if (rtype(i.arg[0]) == RCon) { val = fn->con[i.arg[0].val].val; val = (val + 15) & ~INT64_C(15); if (val < 0 || val > INT32_MAX) diag("isel: alloc too large"); emit(OAlloc, i.to, newcon(val, fn), R); - } else { + } else if (i.op < OAlloc1) { /* r0 = (i.arg[0] + 15) & -16 */ r0 = newtmp(TLong, fn); r1 = newtmp(TLong, fn); emit(OAlloc, i.to, r0, R); emit(OAnd, r0, r1, newcon(-16, fn)); emit(OAdd, r1, i.arg[0], newcon(15, fn)); - } + } else + /* todo, support it! */ + diag("isel: unsupported alloc alignment"); break; default: if (OCmp <= i.op && i.op <= OCmp1) { @@ -217,6 +245,10 @@ Emit: } diag("isel: non-exhaustive implementation"); } + + for (n=0; n<2; n++) + if (cpy[n].s) + emit(OCopy, cpy[n].r, MEM(cpy[n].s), R); } static Ins * @@ -285,6 +317,63 @@ seljmp(Blk *b, Fn *fn) } } +int +slot_(int sz, int al /* log2 */, Fn *fn) +{ + enum { N = sizeof fn->slot / sizeof fn->slot[0] }; + int j, k, s, l, a, ret; + int *sa; + + sa = fn->slot; + a = 1 << al; + l = sz; + + if (l > a) { + /* for large slots, we just + * tack them on the next max + * alignment slot available + * todo, could sophisticate + */ + l = (l + a-1) & ~(a-1); + s = sa[N-1] + l; + ret = s; + for (j=0, k=1; j<N; j++, k*=2) { + l = (l + k-1) & ~(k-1); + sa[j] = sa[N-1] + l; + } + } else { + j = al; + s = sa[j] + a; + ret = s; + Shift: + if (j < N-1 && s < sa[j+1]) + /* ........-----------... + * ^ ^ ^ + * sa[j] sa[j]+a sa[j+1] + * + * we have to skip to the + * next large whole + */ + s = sa[j+1]; + + for (k=0; k<=j; k++) + /* move all smaller holes + * that we contain with us + */ + if (sa[k] == sa[j]) + sa[k] = s; + + if (j < N-1 && s > sa[j+1]) { + /* we were in a bigger hole, + * it needs to shift further + */ + s = sa[++j] + (a *= 2); + goto Shift; + } + } + return ret; +} + /* instruction selection * requires use counts (as given by parsing) */ @@ -293,18 +382,53 @@ isel(Fn *fn) { Blk *b; Ins *i; - uint nins; + Phi *p; + Ref *a; + int n, al, s; + int64_t sz; + + /* 1. assign slots to fast allocs */ + for (n=Tmp0; n<fn->ntmp; n++) + fn->tmp[n].spill = 0; + fn->slot[0] = 0; + fn->slot[1] = 0; + fn->slot[2] = 2; + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (OAlloc <= i->op && i->op <= OAlloc1) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].val; + if (sz < 0 || sz >= INT_MAX-3) + diag("isel: invalid alloc size"); + sz = (sz + 3) / 4; + al = i->op - OAlloc; + s = slot_(sz, al, fn); + assert(s > 0); + fn->tmp[i->to.val].spill = s; + i->to = R; + } + + /* 2. select machine instructions */ for (b=fn->start; b; b=b->link) { + + /* 2.1. process block instructions */ curi = &insb[NIns]; seljmp(b, fn); for (i=&b->ins[b->nins]; i>b->ins;) { sel(*--i, fn); } - nins = &insb[NIns] - curi; + n = &insb[NIns] - curi; free(b->ins); - b->ins = alloc(nins * sizeof b->ins[0]); - memcpy(b->ins, curi, nins * sizeof b->ins[0]); - b->nins = nins; + b->ins = alloc(n * sizeof b->ins[0]); + memcpy(b->ins, curi, n * sizeof b->ins[0]); + b->nins = n; + + /* 2.2. fix fast local references in phis */ + for (p=b->phi; p; p=p->link) + for (a=p->arg; a-p->arg < p->narg; a++) + if (rtype(*a) == RTmp) + if ((s = fn->tmp[a->val].spill)) + *a = MEM(s); } } diff --git a/lisc/lisc.h b/lisc/lisc.h index 3ae5c56..50c6504 100644 --- a/lisc/lisc.h +++ b/lisc/lisc.h @@ -90,6 +90,7 @@ struct Ref { enum { RTmp, RCon, + RMem, RSlot, NRef = (1<<14) - 1 }; @@ -98,6 +99,7 @@ enum { #define TMP(x) (Ref){RTmp, x} #define CON(x) (Ref){RCon, x} #define CON_Z CON(0) /* reserved zero constant */ +#define MEM(x) (Ref){RMem, x} #define SLOT(x) (Ref){RSlot, x} static inline int req(Ref a, Ref b) @@ -139,6 +141,7 @@ enum Op { OLoadub, OCopy, OAlloc, + OAlloc1 = OAlloc + 2, NPubOp, /* reserved instructions */ @@ -237,7 +240,7 @@ struct Fn { int ncon; int nblk; Blk **rpo; - uint nspill; + int slot[3]; }; @@ -262,6 +265,7 @@ void ssafix(Fn *, int); void filllive(Fn *); /* isel.c */ +int slot_(int, int, Fn *); void isel(Fn *); /* spill.c */ diff --git a/lisc/parse.c b/lisc/parse.c index 5a06911..f51d5a5 100644 --- a/lisc/parse.c +++ b/lisc/parse.c @@ -26,7 +26,6 @@ OpDesc opdesc[NOp] = { [OLoadus] = { "loadus", 1, 0 }, [OLoadsb] = { "loadsb", 1, 0 }, [OLoadub] = { "loadub", 1, 0 }, - [OAlloc] = { "alloc", 1, 1 }, [OCopy] = { "copy", 1, 1 }, [ONop] = { "nop", 0, 0 }, [OSwap] = { "swap", 2, 2 }, @@ -34,6 +33,9 @@ OpDesc opdesc[NOp] = { [OXDiv] = { "xdiv", 1, 1 }, [OXCmpw] = { "xcmpw", 2, 1 }, [OXCmpl] = { "xcmpl", 2, 1 }, + [OAlloc] = { "alloc4", 1, 1 }, + [OAlloc+1] = { "alloc8", 1, 1 }, + [OAlloc+2] = { "alloc16", 1, 1 }, #define X(c) \ [OCmp+C##c] = { "c" #c, 2, 0 }, \ @@ -559,7 +561,10 @@ printref(Ref r, Fn *fn, FILE *f) } break; case RSlot: - fprintf(f, "$%d", r.val); + fprintf(f, "S%d", r.val); + break; + case RMem: + fprintf(f, "M%d", r.val); break; } return ""; diff --git a/lisc/spill.c b/lisc/spill.c index ad461f0..a0f6f38 100644 --- a/lisc/spill.c +++ b/lisc/spill.c @@ -375,5 +375,4 @@ spill(Fn *fn) b->ins = alloc(b->nins * sizeof(Ins)); memcpy(b->ins, curi, b->nins * sizeof(Ins)); } - fn->nspill = ns; } |