diff options
Diffstat (limited to 'src/rega.c')
-rw-r--r-- | src/rega.c | 598 |
1 files changed, 0 insertions, 598 deletions
diff --git a/src/rega.c b/src/rega.c deleted file mode 100644 index 7f8edcf..0000000 --- a/src/rega.c +++ /dev/null @@ -1,598 +0,0 @@ -#include "all.h" - -#ifdef TEST_PMOV - #undef assert - #define assert(x) assert_test(#x, x) -#endif - -typedef struct RMap RMap; - -struct RMap { - int t[NIReg+NFReg]; - int r[NIReg+NFReg]; - BSet b[1]; - int n; -}; - -static bits regu; /* registers used */ -static Tmp *tmp; /* function temporaries */ -static Mem *mem; /* function mem references */ -static struct { - Ref src, dst; - int cls; -} *pm; /* parallel move constructed */ -static int cpm, npm; /* capacity and size of pm */ - -static int * -hint(int t) -{ - return &tmp[phicls(t, tmp)].hint.r; -} - -static void -sethint(int t, int r) -{ - bits m; - - m = tmp[phicls(t, tmp)].hint.m; - if (*hint(t) == -1) - if (!(BIT(r) & m)) - *hint(t) = r; -} - -static void -rcopy(RMap *ma, RMap *mb) -{ - memcpy(ma->t, mb->t, sizeof ma->t); - memcpy(ma->r, mb->r, sizeof ma->r); - bscopy(ma->b, mb->b); - ma->n = mb->n; -} - -static int -rfind(RMap *m, int t) -{ - int i; - - for (i=0; i<m->n; i++) - if (m->t[i] == t) - return m->r[i]; - return -1; -} - -static Ref -rref(RMap *m, int t) -{ - int r, s; - - r = rfind(m, t); - if (r == -1) { - s = tmp[t].slot; - assert(s != -1 && "should have spilled"); - return SLOT(s); - } else - return TMP(r); -} - -static void -radd(RMap *m, int t, int r) -{ - assert((t >= Tmp0 || t == r) && "invalid temporary"); - assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register"); - assert(!bshas(m->b, t) && "temporary has mapping"); - assert(!bshas(m->b, r) && "register already allocated"); - assert(m->n <= NIReg+NFReg && "too many mappings"); - bsset(m->b, t); - bsset(m->b, r); - m->t[m->n] = t; - m->r[m->n] = r; - m->n++; - regu |= BIT(r); -} - -static Ref -ralloc(RMap *m, int t) -{ - bits regs; - int r, r0, r1; - - if (t < Tmp0) { - assert(bshas(m->b, t)); - return TMP(t); - } - if (bshas(m->b, t)) { - r = rfind(m, t); - assert(r != -1); - return TMP(r); - } - r = *hint(t); - if (r == -1 || bshas(m->b, r)) { - regs = tmp[phicls(t, tmp)].hint.m; - regs |= m->b->t[0]; - switch (KBASE(tmp[t].cls)) { - case 0: - r0 = RAX; - r1 = RAX + NIReg; - break; - case 1: - r0 = XMM0; - r1 = XMM0 + NFReg; - break; - } - for (r=r0; r<r1; r++) - if (!(regs & BIT(r))) - goto Found; - for (r=r0; r<r1; r++) - if (!bshas(m->b, r)) - goto Found; - diag("rega: no more regs"); - } -Found: - radd(m, t, r); - sethint(t, r); - return TMP(r); -} - -static int -rfree(RMap *m, int t) -{ - int i, r; - - if (!bshas(m->b, t)) - return -1; - for (i=0; m->t[i] != t; i++) - assert(i+1 < m->n); - r = m->r[i]; - bsclr(m->b, t); - bsclr(m->b, r); - m->n--; - memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); - memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); - return r; -} - -static void -mdump(RMap *m) -{ - int i; - - for (i=0; i<m->n; i++) - fprintf(stderr, " (%s, R%d)", - tmp[m->t[i]].name, - m->r[i]); - fprintf(stderr, "\n"); -} - -static void -pmadd(Ref src, Ref dst, int k) -{ - if (npm == cpm) { - cpm = cpm * 2 + 16; - pm = realloc(pm, cpm * sizeof pm[0]); - if (!pm) - diag("pmadd: out of memory"); - } - pm[npm].src = src; - pm[npm].dst = dst; - pm[npm].cls = k; - npm++; -} - -enum PMStat { ToMove, Moving, Moved }; - -static Ref -pmrec(enum PMStat *status, int i, int *k) -{ - Ref swp, swp1; - int j, k1; - - /* note, this routine might emit - * too many large instructions: - * - * , x -- x - * x -- x -- x | - * ` x -- x - * - * if only the first move is wide - * the whole cycle will be wide, - * this is safe but not necessary - */ - - if (req(pm[i].src, pm[i].dst)) - return R; - status[i] = Moving; - assert(KBASE(*k) == KBASE(pm[i].cls)); - assert((Kw|1) == Kl && (Ks|1) == Kd); - *k |= KWIDE(pm[i].cls); /* see above */ - swp = R; - for (j=0; j<npm; j++) { - if (req(pm[j].src, pm[i].dst)) - switch (status[j]) { - case ToMove: - k1 = *k; - swp1 = pmrec(status, j, &k1); - if (!req(swp1, R)) { - assert(req(swp, R)); - swp = swp1; - *k = k1; - } - break; - case Moving: - assert(req(swp, R)); - swp = pm[i].dst; - break; - case Moved: - break; - } - } - status[i] = Moved; - if (req(swp, R)) { - *curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls}; - return R; - } else if (!req(swp, pm[i].src)) { - *curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k}; - return swp; - } else - return R; - -} - -static void -pmgen() -{ - int i, k; - enum PMStat *status; - - status = alloc(npm * sizeof status[0]); - assert(!npm || status[npm-1] == ToMove); - curi = insb; - for (i=0; i<npm; i++) - if (status[i] == ToMove) { - k = pm[i].cls; - pmrec(status, i, &k); - } -} - -static void -move(int r, Ref to, RMap *m) -{ - int n, t, r1; - - r1 = req(to, R) ? -1 : rfree(m, to.val); - if (bshas(m->b, r) && r1 != r) { - /* r is used and not by to */ - for (n=0; m->r[n] != r; n++) - assert(n+1 < m->n); - t = m->t[n]; - rfree(m, t); - bsset(m->b, r); - ralloc(m, t); - bsclr(m->b, r); - } - t = req(to, R) ? r : to.val; - radd(m, t, r); -} - -static int -regcpy(Ins *i) -{ - return i->op == OCopy && isreg(i->arg[0]); -} - -static Ins * -dopm(Blk *b, Ins *i, RMap *m) -{ - RMap m0; - int n, r, r1, t, s; - Ins *i0, *i1, *ip, *ir; - bits def; - - m0 = *m; - i1 = ++i; - do { - i--; - move(i->arg[0].val, i->to, m); - } while (i != b->ins && regcpy(i-1)); - assert(m0.n <= m->n); - if (i != b->ins && (i-1)->op == OCall) { - def = retregs((i-1)->arg[1], 0); - for (r=0; r<NRSave; r++) - if (!(BIT(rsave[r]) & def)) - move(rsave[r], R, m); - } - for (npm=0, n=0; n<m->n; n++) { - t = m->t[n]; - s = tmp[t].slot; - r1 = m->r[n]; - r = rfind(&m0, t); - if (r != -1) - pmadd(TMP(r1), TMP(r), tmp[t].cls); - else if (s != -1) - pmadd(TMP(r1), SLOT(s), tmp[t].cls); - } - for (ip=i; ip<i1; ip++) { - if (!req(ip->to, R)) - rfree(m, ip->to.val); - r = ip->arg[0].val; - if (rfind(m, r) == -1) - radd(m, r, r); - } - pmgen(); -#ifdef TEST_PMOV - return 0; -#endif - n = b->nins - (i1 - i) + (curi - insb); - i0 = alloc(n * sizeof(Ins)); - ip = icpy(ip = i0, b->ins, i - b->ins); - ip = icpy(ir = ip, insb, curi - insb); - ip = icpy(ip, i1, &b->ins[b->nins] - i1); - b->nins = n; - b->ins = i0; - return ir; -} - -static int -prio(Ref r1, Ref r2) -{ - /* trivial heuristic to begin with, - * later we can use the distance to - * the definition instruction - */ - (void) r2; - return *hint(r1.val) != -1; -} - -static void -insert(Ref *r, Ref **rs, int p) -{ - int i; - - rs[i = p] = r; - while (i-- > 0 && prio(*r, *rs[i])) { - rs[i+1] = rs[i]; - rs[i] = r; - } -} - -static void -doblk(Blk *b, RMap *cur) -{ - int x, r, nr; - bits rs; - Ins *i; - Mem *m; - Ref *ra[4]; - - if (rtype(b->jmp.arg) == RTmp) - b->jmp.arg = ralloc(cur, b->jmp.arg.val); - else if (rtype(b->jmp.arg) == RACall) { - /* add return registers */ - rs = retregs(b->jmp.arg, 0); - for (r=0; rs; rs/=2, r++) - if (rs & 1) - radd(cur, r, r); - } - for (i=&b->ins[b->nins]; i!=b->ins;) { - switch ((--i)->op) { - case OCall: - rs = argregs(i->arg[1], 0); - for (r=0; r<NRSave; r++) - if (!(BIT(rsave[r]) & rs)) - rfree(cur, rsave[r]); - break; - case OCopy: - if (isreg(i->arg[0])) { - i = dopm(b, i, cur); - continue; - } - if (isreg(i->to)) - if (rtype(i->arg[0]) == RTmp) - sethint(i->arg[0].val, i->to.val); - /* fall through */ - default: - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - r = rfree(cur, i->to.val); - if (r == -1 && !isreg(i->to)) { - *i = (Ins){.op = ONop}; - continue; - } - if (i->to.val >= Tmp0) - i->to = TMP(r); - } - break; - } - for (x=0, nr=0; x<2; x++) - switch (rtype(i->arg[x])) { - case RAMem: - m = &mem[i->arg[x].val & AMask]; - if (rtype(m->base) == RTmp) - insert(&m->base, ra, nr++); - if (rtype(m->index) == RTmp) - insert(&m->index, ra, nr++); - break; - case RTmp: - insert(&i->arg[x], ra, nr++); - break; - } - for (r=0; r<nr; r++) - *ra[r] = ralloc(cur, ra[r]->val); - } -} - -/* register allocation - * depends on rpo, phi, cost, (and obviously spill) - */ -void -rega(Fn *fn) -{ - int j, n, t, r, r1, x, rl[Tmp0]; - Blk *b, *b1, *s, ***ps, *blist; - RMap *end, *beg, cur, old; - Ins *i; - Phi *p; - uint u; - Ref src, dst; - - /* 1. setup */ - regu = 0; - tmp = fn->tmp; - mem = fn->mem; - end = alloc(fn->nblk * sizeof end[0]); - beg = alloc(fn->nblk * sizeof beg[0]); - for (n=0; n<fn->nblk; n++) { - bsinit(end[n].b, fn->ntmp); - bsinit(beg[n].b, fn->ntmp); - } - bsinit(cur.b, fn->ntmp); - bsinit(old.b, fn->ntmp); - - for (t=Tmp0; t<fn->ntmp; t++) - *hint(t) = -1; - for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) - if (i->op != OCopy || !isreg(i->arg[0])) - break; - else { - assert(rtype(i->to) == RTmp); - sethint(i->to.val, i->arg[0].val); - } - - /* 2. assign registers following post-order */ - for (n=fn->nblk-1; n>=0; n--) { - b = fn->rpo[n]; - cur.n = 0; - bszero(cur.b); - for (x=0; x<2; x++) - for (t=Tmp0; t<fn->ntmp; t++) { - assert(bshas(b->out, t) || - !bshas(cur.b, t)); - if (bshas(b->out, t)) - if (!bshas(cur.b, t)) - if (x || (r=*hint(t)) != -1) - if (x || !bshas(cur.b, r)) - ralloc(&cur, t); - } - rcopy(&end[n], &cur); - doblk(b, &cur); - bscopy(b->in, cur.b); - for (p=b->phi; p; p=p->link) - if (rtype(p->to) == RTmp) { - bsclr(b->in, p->to.val); - /* heuristic 0: - * if the phi destination has an - * argument from a frequent block - * that was already allocated to - * 'r', use 'r' as the new hint - */ - memset(rl, 0, sizeof rl); - for (u=0; u<p->narg; u++) { - t = p->arg[u].val; - b1 = p->blk[u]; - if (rtype(p->arg[u]) == RTmp) - if ((r=rfind(&end[b1->id], t)) != -1) - rl[r] += b1->loop; - } - for (x=0, j=0; j<Tmp0; j++) - if (rl[j] > rl[x]) - x = j; - if (rl[x] >= b->loop) - *hint(p->to.val) = x; - } - if (b->npred > 1) { - /* heuristic 1: - * attempt to satisfy hints - * when it's simple and we have - * multiple predecessors - */ - rcopy(&old, &cur); - curi = &insb[NIns]; - for (j=0; j<old.n; j++) { - t = old.t[j]; - r = *hint(t); - r1 = rfind(&cur, t); - if (r != -1 && r != r1) - if (!bshas(cur.b, r)) { - rfree(&cur, t); - radd(&cur, t, r); - x = tmp[t].cls; - emit(OCopy, x, TMP(r1), TMP(r), R); - } - } - if ((j = &insb[NIns] - curi)) { - b->nins += j; - i = alloc(b->nins * sizeof(Ins)); - icpy(icpy(i, curi, j), b->ins, b->nins-j); - b->ins = i; - } - } - rcopy(&beg[n], &cur); - } - if (debug['R']) { - fprintf(stderr, "\n> Register mappings:\n"); - for (n=0; n<fn->nblk; n++) { - b = fn->rpo[n]; - fprintf(stderr, "\t%-10s beg", b->name); - mdump(&beg[n]); - fprintf(stderr, "\t end"); - mdump(&end[n]); - } - fprintf(stderr, "\n"); - } - - /* 3. compose glue code */ - blist = 0; - for (b=fn->start;; b=b->link) { - ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; - for (; (s=**ps); ps++) { - npm = 0; - for (p=s->phi; p; p=p->link) { - dst = p->to; - assert(rtype(dst)==RSlot || rtype(dst)==RTmp); - if (rtype(dst) == RTmp) { - r = rfind(&beg[s->id], dst.val); - if (r == -1) - continue; - dst = TMP(r); - } - for (u=0; p->blk[u]!=b; u++) - assert(u+1 < p->narg); - src = p->arg[u]; - if (rtype(src) == RTmp) - src = rref(&end[b->id], src.val); - pmadd(src, dst, p->cls); - } - for (t=Tmp0; t<fn->ntmp; t++) - if (bshas(s->in, t)) { - src = rref(&end[b->id], t); - dst = rref(&beg[s->id], t); - pmadd(src, dst, tmp[t].cls); - } - pmgen(); - if (curi == insb) - continue; - b1 = blknew(); - b1->loop = (b->loop+s->loop) / 2; - b1->link = blist; - blist = b1; - fn->nblk++; - sprintf(b1->name, "%s_%s", b->name, s->name); - b1->nins = curi - insb; - idup(&b1->ins, insb, b1->nins); - b1->jmp.type = JJmp; - b1->s1 = s; - **ps = b1; - } - if (!b->link) { - b->link = blist; - break; - } - } - for (b=fn->start; b; b=b->link) - b->phi = 0; - fn->reg = regu; - - if (debug['R']) { - fprintf(stderr, "\n> After register allocation:\n"); - printfn(fn, stderr); - } -} |