diff options
author | Quentin Carbonneaux <quentin@c9x.me> | 2019-02-21 22:35:12 +0100 |
---|---|---|
committer | Quentin Carbonneaux <quentin@c9x.me> | 2019-02-26 08:51:59 +0100 |
commit | b2ea8c11b61014cb90e2a025d605ac77a1c7d6bc (patch) | |
tree | c3d3c5c7372ea7c2a29bec69110e8e70246258a9 /copy.c | |
parent | dadf6d69d8ef24ada3461ddc81cf56418cfdc91e (diff) | |
download | roux-b2ea8c11b61014cb90e2a025d605ac77a1c7d6bc.tar.gz |
new copy elimination pass
The sparse data-flow analysis used for copy elimination before this patch could sometimes diverge. The core reason for this behavior is that the visitphi() function was not monotonic in the following copy-of lattice: top (represented as the temp / | \ itself) x y z ... \ | / bot (represented as R) This monotonicity defect could be fixed by reverting 2f41ff03, but then the pass would end up missing some redundant phis. This patch re-implements the pass from scratch using a different approach. The new algorithm should get rid of all redundant copies. On the other hand, it can run slower than the monotonic sparse data-flow analysis because, in the worst case, an instruction in a phi cluster can be visited as many times as there are phis in the input program. Thanks to Michael Forney for reviewing and testing the new pass.
Diffstat (limited to 'copy.c')
-rw-r--r-- | copy.c | 220 |
1 files changed, 112 insertions, 108 deletions
diff --git a/copy.c b/copy.c index 4432986..b92c5f4 100644 --- a/copy.c +++ b/copy.c @@ -1,58 +1,7 @@ #include "all.h" -typedef struct RList RList; -struct RList { - int t; - RList *l; -}; - -static Ref -copyof(Ref r, Ref *cp) -{ - if (rtype(r) == RTmp) - return cp[r.val]; - else - return r; -} - -static void -update(Ref r, Ref rcp, Ref *cp, RList ***pw) -{ - RList *l; - - if (!req(cp[r.val], rcp)) { - cp[r.val] = rcp; - l = emalloc(sizeof *l); - l->t = r.val; - l->l = 0; - **pw = l; - *pw = &l->l; - } -} - -static void -visitphi(Phi *p, Ref *cp, RList ***pw) -{ - uint a; - Ref r, r1; - - r = R; - for (a=0; a<p->narg; a++) { - r1 = copyof(p->arg[a], cp); - if (req(r1, R) || req(r1, p->to)) - continue; - if (req(r, R) || req(r, r1)) - r = r1; - else { - r = p->to; - break; - } - } - update(p->to, r, cp, pw); -} - static int -iscopy(Ins *i, Ref r, Fn *fn) +iscopy(Ins *i, Ref r, Tmp *tmp) { static bits extcpy[] = { [WFull] = 0, @@ -74,7 +23,7 @@ iscopy(Ins *i, Ref r, Fn *fn) if (i->cls == Kw) return 1; - t = &fn->tmp[r.val]; + t = &tmp[r.val]; assert(KBASE(t->cls) == 0); if (i->cls == Kl && t->cls == Kw) return 0; @@ -82,105 +31,160 @@ iscopy(Ins *i, Ref r, Fn *fn) return (BIT(Wsb + (i->op-Oextsb)) & b) != 0; } +static Ref +copyof(Ref r, Ref *cpy) +{ + if (rtype(r) == RTmp && !req(cpy[r.val], R)) + return cpy[r.val]; + return r; +} + +/* detects a cluster of phis/copies redundant with 'r'; + * the algorithm is inspired by Section 3.2 of "Simple + * and Efficient SSA Construction" by Braun M. et al. + */ static void -visitins(Ins *i, Ref *cp, RList ***pw, Fn *fn) +phisimpl(Phi *p, Ref r, Ref *cpy, Use ***pstk, BSet *ts, BSet *as, Tmp *tmp) { - Ref r; + Use **stk, *u, *u1; + uint nstk, a; + int t; + Ref r1; - r = copyof(i->arg[0], cp); - if (iscopy(i, r, fn)) { - update(i->to, r, cp, pw); - } else if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - update(i->to, i->to, cp, pw); + bszero(ts); + bszero(as); + stk = *pstk; + nstk = 1; + stk[0] = &(Use){.type = UPhi, .u.phi = p}; + while (nstk) { + u = stk[--nstk]; + if (u->type == UIns && iscopy(u->u.ins, r, tmp)) { + p = &(Phi){.narg = 0}; + t = u->u.ins->to.val; + } + else if (u->type == UPhi) { + p = u->u.phi; + t = p->to.val; + } + else + continue; + if (bshas(ts, t)) + continue; + bsset(ts, t); + for (a=0; a<p->narg; a++) { + r1 = copyof(p->arg[a], cpy); + if (req(r1, r)) + continue; + if (rtype(r1) != RTmp) + return; + bsset(as, r1.val); + } + u = tmp[t].use; + u1 = &u[tmp[t].nuse]; + vgrow(pstk, nstk+(u1-u)); + stk = *pstk; + for (; u<u1; u++) + stk[nstk++] = u; } + bsdiff(as, ts); + if (!bscount(as)) + for (t=0; bsiter(ts, &t); t++) + cpy[t] = r; } static void -subst(Ref *r, Ref *cp) +subst(Ref *pr, Ref *cpy) { - assert((rtype(*r) != RTmp || !req(copyof(*r, cp), R)) && "ssa invariant broken"); - *r = copyof(*r, cp); + assert(rtype(*pr) != RTmp || !req(cpy[pr->val], R)); + *pr = copyof(*pr, cpy); } +/* requires use and rpo, breaks use */ void copy(Fn *fn) { - Blk *b; - Ref *cp, r; - RList *w, *w1, **pw; - Use *u, *u1; - Ins *i; + BSet ts[1], as[1]; + Use **stk; Phi *p, **pp; - uint a; + Ins *i; + Blk *b; + uint n, a; + Ref *cpy, r; int t; - w = 0; - pw = &w; - cp = emalloc(fn->ntmp * sizeof cp[0]); - for (b=fn->start; b; b=b->link) { - for (p=b->phi; p; p=p->link) - visitphi(p, cp, &pw); - for (i=b->ins; i<&b->ins[b->nins]; i++) - visitins(i, cp, &pw, fn); - } - while ((w1=w)) { - t = w->t; - u = fn->tmp[t].use; - u1 = u + fn->tmp[t].nuse; - for (; u<u1; u++) - switch (u->type) { - case UPhi: - visitphi(u->u.phi, cp, &pw); - break; - case UIns: - visitins(u->u.ins, cp, &pw, fn); - break; - case UJmp: - break; - default: - die("invalid use %d", u->type); - } - w = w->l; - free(w1); + bsinit(ts, fn->ntmp); + bsinit(as, fn->ntmp); + cpy = emalloc(fn->ntmp * sizeof cpy[0]); + stk = vnew(10, sizeof stk[0], Pheap); + + /* 1. build the copy-of map */ + for (n=0; n<fn->nblk; n++) { + b = fn->rpo[n]; + for (p=b->phi; p; p=p->link) { + assert(rtype(p->to) == RTmp); + if (!req(cpy[p->to.val], R)) + continue; + r = R; + for (a=0; a<p->narg; a++) + if (p->blk[a]->id < n) + r = copyof(p->arg[a], cpy); + assert(!req(r, R)); + cpy[p->to.val] = p->to; + phisimpl(p, r, cpy, &stk, ts, as, fn->tmp); + } + for (i=b->ins; i<&b->ins[b->nins]; i++) { + assert(rtype(i->to) <= RTmp); + if (!req(cpy[i->to.val], R)) + continue; + r = copyof(i->arg[0], cpy); + if (iscopy(i, r, fn->tmp)) + cpy[i->to.val] = r; + else + cpy[i->to.val] = i->to; + } } + + /* 2. remove redundant phis/copies + * and rewrite their uses */ for (b=fn->start; b; b=b->link) { for (pp=&b->phi; (p=*pp);) { - r = cp[p->to.val]; + r = cpy[p->to.val]; if (!req(r, p->to)) { *pp = p->link; continue; } for (a=0; a<p->narg; a++) - subst(&p->arg[a], cp); + subst(&p->arg[a], cpy); pp=&p->link; } for (i=b->ins; i<&b->ins[b->nins]; i++) { - r = copyof(i->to, cp); + r = cpy[i->to.val]; if (!req(r, i->to)) { *i = (Ins){.op = Onop}; continue; } - for (a=0; a<2; a++) - subst(&i->arg[a], cp); + subst(&i->arg[0], cpy); + subst(&i->arg[1], cpy); } - subst(&b->jmp.arg, cp); + subst(&b->jmp.arg, cpy); } + if (debug['C']) { fprintf(stderr, "\n> Copy information:"); for (t=Tmp0; t<fn->ntmp; t++) { - if (req(cp[t], R)) { + if (req(cpy[t], R)) { fprintf(stderr, "\n%10s not seen!", fn->tmp[t].name); } - else if (!req(cp[t], TMP(t))) { + else if (!req(cpy[t], TMP(t))) { fprintf(stderr, "\n%10s copy of ", fn->tmp[t].name); - printref(cp[t], fn, stderr); + printref(cpy[t], fn, stderr); } } fprintf(stderr, "\n\n> After copy elimination:\n"); printfn(fn, stderr); } - free(cp); + vfree(stk); + free(cpy); } |