diff options
author | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-09-14 23:36:26 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-09-15 23:01:33 -0400 |
commit | f7bfa2e435c78917bd6df0c80e7e488751dac58c (patch) | |
tree | ff55a42ecc560ccde7af547b59c71b94afe6844b /lisc | |
parent | bad74e6dce897df9f21cea5bf0a32df298856351 (diff) | |
download | roux-f7bfa2e435c78917bd6df0c80e7e488751dac58c.tar.gz |
heavy modification of call handling
The IR generated by calls was very bulky because two instructions were used for marking the live range of a clobber. This patch attempts to store the information of what registers are use/def/clobber in the call instruction itself, this leads to more compact code (even more when we'll have SSE registers). However, I find that the amount of extra code needed is not really easonable. Fortunately it is not too invasive, thus if the complexity creeps in, it should be easy to revert.
Diffstat (limited to 'lisc')
-rw-r--r-- | lisc/isel.c | 60 | ||||
-rw-r--r-- | lisc/lisc.h | 13 | ||||
-rw-r--r-- | lisc/live.c | 12 | ||||
-rw-r--r-- | lisc/main.c | 1 | ||||
-rw-r--r-- | lisc/parse.c | 7 | ||||
-rw-r--r-- | lisc/rega.c | 55 | ||||
-rw-r--r-- | lisc/spill.c | 16 |
7 files changed, 125 insertions, 39 deletions
diff --git a/lisc/isel.c b/lisc/isel.c index 6928a1a..1712333 100644 --- a/lisc/isel.c +++ b/lisc/isel.c @@ -442,10 +442,51 @@ classify(AInfo *a, Typ *t) } } +int rsave[NRSave] = {RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX}; + +uint64_t calldef(Ins i, int *p) +{ + uint64_t b; + int n; + + n = i.arg[1].val & 15; + b = 1ll << RAX; + if (n == 2) + b |= 1ll << RDX; + if (p) + *p = n; + return b; +} + +uint64_t calluse(Ins i, int *p) +{ + uint64_t b; + int j, n; + + n = (i.arg[1].val >> 4) & 15; + for (j = 0, b = 0; j < n; j++) + b |= 1ll << rsave[j]; + if (p) + *p = n; + return b; +} + +uint64_t callclb(Ins i, int *p) +{ + uint64_t b; + int j, n; + + n = (i.arg[1].val >> 4) & 15; + for (j = n, b = 0; j < NRSave; j++) + b |= 1ll << rsave[j]; + if (p) + *p = n; + return b; +} + static void selcall(Fn *fn, Ins *i0, Ins *i1) { - static int ireg[8] = {RDI, RSI, RDX, RCX, R8, R9, R10, R11}; Ins *i; AInfo *ai, *a; int nint, nsse, ni, ns, n; @@ -497,20 +538,15 @@ selcall(Fn *fn, Ins *i0, Ins *i1) } stk += stk & 15; - if (rtype(i1->arg[1]) == RTyp) + if (!req(i1->arg[1], R)) diag("struct-returning function not implemented"); if (stk) { r = newcon(-(int64_t)stk, fn); emit(OSAlloc, 0, R, r, R); } - for (n=0; n<8; n++) - emit(OCopy, 0, R, TMP(ireg[n]), R); emit(OCopy, i1->wide, i1->to, TMP(RAX), R); - emit(OCall, 0, R, i->arg[0], R); - emit(OCopy, 1, TMP(RAX), R, R); - for (n=6-nint; n<8; n++) - emit(OCopy, 1, TMP(ireg[n]), R, R); + emit(OCall, 0, R, i->arg[0], CALL(1 + ((6-nint) << 4))); for (i=i0, a=ai, ni=0; i<i1; i++, a++) { if (a->inmem) @@ -519,18 +555,18 @@ selcall(Fn *fn, Ins *i0, Ins *i1) if (a->rty[0] == RSse || a->rty[1] == RSse) diag("isel: unsupported float struct"); if (a->size > 8) { - r = TMP(ireg[ni+1]); + r = TMP(rsave[ni+1]); r1 = newtmp(fn); emit(OLoad, 1, r, r1, R); r = newcon(8, fn); emit(OAdd, 1, r1, i->arg[1], r); - r = TMP(ireg[ni]); + r = TMP(rsave[ni]); ni += 2; } else - r = TMP(ireg[ni++]); + r = TMP(rsave[ni++]); emit(OLoad, 1, r, i->arg[1], R); } else { - r = TMP(ireg[ni++]); + r = TMP(rsave[ni++]); emit(OCopy, i->wide, r, i->arg[0], R); } } diff --git a/lisc/lisc.h b/lisc/lisc.h index c85c521..ff80900 100644 --- a/lisc/lisc.h +++ b/lisc/lisc.h @@ -43,7 +43,8 @@ enum Reg { Tmp0, /* first non-reg temporary */ - NReg = R12 - RAX + 1 + NReg = R12 - RAX + 1, + NRSave = 9, }; enum { @@ -76,7 +77,8 @@ enum { RTmp, RCon, RSlot, - RTyp, + RAlt, + RCallm = 0x1000, NRef = (1<<14) - 1 }; @@ -85,7 +87,8 @@ enum { #define CON(x) (Ref){RCon, x} #define CON_Z CON(0) /* reserved zero constant */ #define SLOT(x) (Ref){RSlot, x} -#define TYP(x) (Ref){RTyp, x} +#define TYP(x) (Ref){RAlt, x} +#define CALL(x) (Ref){RAlt, x|RCallm} static inline int req(Ref a, Ref b) { return a.type == b.type && a.val == b.val; } @@ -274,6 +277,10 @@ void ssafix(Fn *, int); void filllive(Fn *); /* isel.c */ +extern int rsave[NRSave]; +uint64_t calldef(Ins, int *); +uint64_t calluse(Ins, int *); +uint64_t callclb(Ins, int *); int slota(int, int, int *); void isel(Fn *); diff --git a/lisc/live.c b/lisc/live.c index 64fcf6a..c3aae86 100644 --- a/lisc/live.c +++ b/lisc/live.c @@ -21,7 +21,7 @@ filllive(Fn *f) Blk *b; Ins *i; Phi *p; - int z, n, chg, nlv; + int z, m, n, chg, nlv; uint a; Bits tb; @@ -50,7 +50,15 @@ Again: bset(b->jmp.arg, b, &nlv); b->nlive = nlv; for (i=&b->ins[b->nins]; i!=b->ins;) { - i--; + if ((--i)->op == OCall) { + b->in.t[0] &= ~calldef(*i, &m); + nlv -= m; + if (nlv + NRSave > b->nlive) + b->nlive = nlv + NRSave; + b->in.t[0] |= calluse(*i, &m); + nlv += m; + bset(i->arg[0], b, &nlv); + } if (!req(i->to, R)) { assert(rtype(i->to) == RTmp); nlv -= BGET(b->in, i->to.val); diff --git a/lisc/main.c b/lisc/main.c index cb2e82e..1452ff6 100644 --- a/lisc/main.c +++ b/lisc/main.c @@ -63,6 +63,7 @@ main(int ac, char *av[]) Blk *b; fprintf(stderr, "[Testing Liveness]\n"); + isel(fn); fillrpo(fn); filllive(fn); for (b=fn->start; b; b=b->link) { diff --git a/lisc/parse.c b/lisc/parse.c index 957fae1..29f5653 100644 --- a/lisc/parse.c +++ b/lisc/parse.c @@ -779,8 +779,11 @@ printref(Ref r, Fn *fn, FILE *f) case RSlot: fprintf(f, "S%d", r.val); break; - case RTyp: - fprintf(f, ":%s", typ[r.val].name); + case RAlt: + if (r.val & RCallm) + fprintf(f, "%x", r.val & (RCallm-1)); + else + fprintf(f, ":%s", typ[r.val].name); break; } } diff --git a/lisc/rega.c b/lisc/rega.c index b4e4572..1263c6a 100644 --- a/lisc/rega.c +++ b/lisc/rega.c @@ -192,36 +192,50 @@ pmgen() free(status); } +static void +move(int r, Ref to, RMap *m) +{ + int n, t, r1; + + r1 = req(to, R) ? -1 : rfree(m, to.val); + if (BGET(m->b, r) && r1 != r) { + /* r is used and not by to */ + for (n=0; m->r[n] != r; n++) + assert(n+1 < m->n); + t = m->t[n]; + rfree(m, t); + BSET(m->b, r); + ralloc(m, t); + BCLR(m->b, r); + } + t = r1 == -1 ? r : to.val; + radd(m, t, r); +} + static Ins * dopm(Blk *b, Ins *i, RMap *m) { RMap m0; int n, r, r1, t, nins; Ins *i1, *ib, *ip, *ir; + uint64_t def; m0 = *m; i1 = i+1; for (;; i--) { - r = i->arg[0].val; - r1 = req(i->to, R) ? -1 : rfree(m, i->to.val); - if (BGET(m->b, r) && r1 != r) { - /* r is used and not by i->to, */ - for (n=0; m->r[n] != r; n++) - assert(n+1 < m->n); - t = m->t[n]; - rfree(m, t); - BSET(m->b, r); - ralloc(m, t); - BCLR(m->b, r); - } - t = r1 == -1 ? r : i->to.val; - radd(m, t, r); + move(i->arg[0].val, i->to, m); if (i == b->ins || (i-1)->op != OCopy || !isreg((i-1)->arg[0])) break; } assert(m0.n <= m->n); + if (i > b->ins && (i-1)->op == OCall) { + def = calldef(*i, 0); + for (r=0; r<NRSave; r++) + if (!((1ll << rsave[r]) & def)) + move(rsave[r], R, m); + } for (npm=0, n=0; n<m->n; n++) if ((t = m->t[n]) >= Tmp0) { r1 = m->r[n]; @@ -266,6 +280,7 @@ rega(Fn *fn) Phi *p; uint a; Ref src, dst; + uint64_t rs; tmp = fn->tmp; end = alloc(fn->nblk * sizeof end[0]); @@ -303,8 +318,16 @@ rega(Fn *fn) if (rtype(b->jmp.arg) == RTmp) b->jmp.arg = ralloc(&cur, b->jmp.arg.val); for (i=&b->ins[b->nins]; i!=b->ins;) { - i--; - if (i->op == OCopy && isreg(i->arg[0])) { + switch ((--i)->op) { + case OCall: + rs = calldef(*i, 0) | callclb(*i, 0); + for (r=0; r<NReg; r++) + if ((1ll << r) & rs) + rfree(&cur, r); + continue; + case OCopy: + if (!isreg(i->arg[0])) + break; i = dopm(b, i, &cur); continue; } diff --git a/lisc/spill.c b/lisc/spill.c index 2625ceb..7b2476e 100644 --- a/lisc/spill.c +++ b/lisc/spill.c @@ -277,6 +277,7 @@ inregs(Blk *b, Blk *s) /* todo, move to live.c */ static Ins * dopm(Blk *b, Ins *i, Bits *v) { + int j; Ins *i1; /* consecutive moves from @@ -293,7 +294,14 @@ dopm(Blk *b, Ins *i, Bits *v) || !isreg((i-1)->arg[0])) break; } - limit(v, NReg, 0); + if (i > b->ins && (i-1)->op == OCall) { + calldef(*--i, &j); + limit(v, NReg - NRSave + j, 0); + v->t[0] &= ~calldef(*i, 0); + v->t[0] |= calluse(*i, 0); + setloc(&i->arg[0], v, &(Bits){{0}}); + } else + limit(v, NReg, 0); do emit(*--i1); while (i1 != i); @@ -388,21 +396,21 @@ spill(Fn *fn) for (i=&b->ins[b->nins]; i!=b->ins;) { assert(bcnt(&v) <= NReg); i--; - s = 0; if (i->op == OCopy && isreg(i->arg[0])) { i = dopm(b, i, &v); continue; } + s = 0; + w = (Bits){{0}}; if (!req(i->to, R)) { assert(rtype(i->to) == RTmp); t = i->to.val; if (BGET(v, t)) BCLR(v, t); else - limit(&v, NReg-1, &w); + limit(&v, NReg-1, 0); s = tmp[t].spill; } - w = (Bits){{0}}; j = opdesc[i->op].nmem; if (!j && rtype(i->arg[0]) == RTmp) BSET(w, i->arg[0].val); |