diff options
author | Quentin Carbonneaux <quentin@c9x.me> | 2022-12-10 23:16:21 +0100 |
---|---|---|
committer | Quentin Carbonneaux <quentin@c9x.me> | 2022-12-14 23:18:26 +0100 |
commit | 26c1c30b7d96d2170195970a8cdb3b024ba7421a (patch) | |
tree | 79c45ec28d63619fbe2a88ec2195f8fe4a95a8a5 | |
parent | 15e25a61b38b250c7543437a093a9efe076cce0a (diff) | |
download | roux-26c1c30b7d96d2170195970a8cdb3b024ba7421a.tar.gz |
new blit instruction
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | alias.c | 54 | ||||
-rw-r--r-- | all.h | 11 | ||||
-rw-r--r-- | amd64/sysv.c | 13 | ||||
-rw-r--r-- | arm64/abi.c | 19 | ||||
-rw-r--r-- | load.c | 41 | ||||
-rw-r--r-- | main.c | 1 | ||||
-rw-r--r-- | mem.c | 40 | ||||
-rw-r--r-- | ops.h | 2 | ||||
-rw-r--r-- | parse.c | 69 | ||||
-rw-r--r-- | rv64/abi.c | 25 | ||||
-rw-r--r-- | simpl.c | 82 | ||||
-rw-r--r-- | test/load2.ssa | 75 | ||||
-rw-r--r-- | test/mem1.ssa | 35 | ||||
-rw-r--r-- | tools/lexh.c | 2 | ||||
-rw-r--r-- | util.c | 30 |
16 files changed, 398 insertions, 103 deletions
diff --git a/Makefile b/Makefile index 674f850..5fadadc 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ PREFIX = /usr/local BINDIR = $(PREFIX)/bin COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \ - copy.o fold.o live.o spill.o rega.o emit.o + copy.o fold.o simpl.o live.o spill.o rega.o emit.o AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o diff --git a/alias.c b/alias.c index 50e659a..3885115 100644 --- a/alias.c +++ b/alias.c @@ -28,13 +28,17 @@ getalias(Alias *a, Ref r, Fn *fn) } int -alias(Ref p, int sp, Ref q, int sq, int *delta, Fn *fn) +alias(Ref p, int op, int sp, Ref q, int sq, int *delta, Fn *fn) { Alias ap, aq; int ovlap; getalias(&ap, p, fn); getalias(&aq, q, fn); + ap.offset += op; + /* when delta is meaningful (ovlap == 1), + * we do not overflow int because sp and + * sq are bounded by 2^28 */ *delta = ap.offset - aq.offset; ovlap = ap.offset < aq.offset + sq && aq.offset < ap.offset + sp; @@ -103,13 +107,34 @@ esc(Ref r, Fn *fn) } } +static void +store(Ref r, int sz, Fn *fn) +{ + Alias *a; + int64_t off; + bits m; + + if (rtype(r) == RTmp) { + a = &fn->tmp[r.val].alias; + if (a->slot) { + assert(astack(a->type)); + off = a->offset; + if (sz >= NBit + || (off < 0 || off >= NBit)) + m = -1; + else + m = (BIT(sz) - 1) << off; + a->slot->u.loc.m |= m; + } + } +} + void fillalias(Fn *fn) { uint n, m; - int t; + int t, sz; int64_t x; - bits w; Blk *b; Phi *p; Ins *i; @@ -171,26 +196,23 @@ fillalias(Fn *fn) a->offset += a1.offset; } } - if (req(i->to, R) || a->type == AUnk) { + if (req(i->to, R) || a->type == AUnk) + if (i->op != Oblit0) { if (!isload(i->op)) esc(i->arg[0], fn); if (!isstore(i->op)) if (i->op != Oargc) esc(i->arg[1], fn); } - if (isstore(i->op)) - if (rtype(i->arg[1]) == RTmp) { - a = &fn->tmp[i->arg[1].val].alias; - if (a->slot) { - assert(astack(a->type)); - x = a->offset; - if (0 <= x && x < NBit) { - w = BIT(storesz(i)) - 1; - a->slot->u.loc.m |= w << x; - } else - a->slot->u.loc.sz = -1; - } + if (i->op == Oblit0) { + ++i; + assert(i->op == Oblit1); + assert(rtype(i->arg[0]) == RInt); + sz = abs(rsval(i->arg[0])); + store((i-1)->arg[1], sz, fn); } + if (isstore(i->op)) + store(i->arg[1], storesz(i), fn); } if (b->jmp.type != Jretc) esc(b->jmp.arg, fn); diff --git a/all.h b/all.h index fe2b56b..47a61d8 100644 --- a/all.h +++ b/all.h @@ -83,7 +83,8 @@ struct Ref { enum { RTmp, RCon, - RType, + RInt, + RType, /* last kind to come out of the parser */ RSlot, RCall, RMem, @@ -97,6 +98,7 @@ enum { #define TYPE(x) (Ref){RType, x} #define CALL(x) (Ref){RCall, x} #define MEM(x) (Ref){RMem, x} +#define INT(x) (Ref){RInt, (x)&0x1fffffff} static inline int req(Ref a, Ref b) { @@ -474,8 +476,6 @@ int symeq(Sym, Sym); Ref newcon(Con *, Fn *); Ref getcon(int64_t, Fn *); int addcon(Con *, Con *); -void blit(Ref, uint, Ref, uint, uint, Fn *); -void blit0(Ref, Ref, uint, Fn *); void salloc(Ref, Ref, Fn *); void dumpts(BSet *, Tmp *, FILE *); @@ -528,7 +528,7 @@ void coalesce(Fn *); /* alias.c */ void fillalias(Fn *); void getalias(Alias *, Ref, Fn *); -int alias(Ref, int, Ref, int, int *, Fn *); +int alias(Ref, int, int, Ref, int, int *, Fn *); int escapes(Ref, Fn *); /* load.c */ @@ -549,6 +549,9 @@ void copy(Fn *); /* fold.c */ void fold(Fn *); +/* simpl.c */ +void simpl(Fn *); + /* live.c */ void liveon(BSet *, Blk *, Blk *); void filllive(Fn *); diff --git a/amd64/sysv.c b/amd64/sysv.c index f4e0416..04dfd83 100644 --- a/amd64/sysv.c +++ b/amd64/sysv.c @@ -127,7 +127,8 @@ selret(Blk *b, Fn *fn) if (aret.inmem) { assert(rtype(fn->retr) == RTmp); emit(Ocopy, Kl, TMP(RAX), fn->retr, R); - blit0(fn->retr, r0, aret.type->size, fn); + emit(Oblit1, 0, R, INT(aret.type->size), R); + emit(Oblit0, 0, R, r0, fn->retr); ca = 1; } else { ca = retr(reg, &aret); @@ -410,15 +411,15 @@ selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) for (i=i0, a=ac, off=0; i<i1; i++, a++) { if (i->op >= Oarge || !a->inmem) continue; + r1 = newtmp("abi", Kl, fn); if (i->op == Oargc) { if (a->align == 4) off += off & 15; - blit(r, off, i->arg[1], 0, a->type->size, fn); - } else { - r1 = newtmp("abi", Kl, fn); + emit(Oblit1, 0, R, INT(a->type->size), R); + emit(Oblit0, 0, R, i->arg[1], r1); + } else emit(Ostorel, 0, R, i->arg[0], r1); - emit(Oadd, Kl, r1, r, getcon(off, fn)); - } + emit(Oadd, Kl, r1, r, getcon(off, fn)); off += a->size; } emit(Osalloc, Kl, r, getcon(stk, fn), R); diff --git a/arm64/abi.c b/arm64/abi.c index 7282031..8ba4ffc 100644 --- a/arm64/abi.c +++ b/arm64/abi.c @@ -188,7 +188,8 @@ selret(Blk *b, Fn *fn) typclass(&cr, &typ[fn->retty], gpreg, fpreg); if (cr.class & Cptr) { assert(rtype(fn->retr) == RTmp); - blit0(fn->retr, r, cr.t->size, fn); + emit(Oblit1, 0, R, INT(cr.t->size), R); + emit(Oblit0, 0, R, r, fn->retr); cty = 0; } else { ldregs(cr.reg, cr.cls, cr.nreg, r, fn); @@ -438,8 +439,8 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) if ((c->class & Cstk) == 0) continue; off = align(off, c->align); + r = newtmp("abi", Kl, fn); if (i->op == Oarg || isargbh(i->op)) { - r = newtmp("abi", Kl, fn); switch (c->size) { case 1: op = Ostoreb; break; case 2: op = Ostoreh; break; @@ -447,18 +448,22 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) case 8: op = store[*c->cls]; break; } emit(op, 0, R, i->arg[0], r); - emit(Oadd, Kl, r, TMP(SP), getcon(off, fn)); + } else { + assert(i->op == Oargc); + emit(Oblit1, 0, R, INT(c->size), R); + emit(Oblit0, 0, R, i->arg[1], r); } - if (i->op == Oargc) - blit(TMP(SP), off, i->arg[1], 0, c->size, fn); + emit(Oadd, Kl, r, TMP(SP), getcon(off, fn)); off += c->size; } if (stk) emit(Osub, Kl, TMP(SP), TMP(SP), rstk); for (i=i0, c=ca; i<i1; i++, c++) - if (c->class & Cptr) - blit0(i->arg[0], i->arg[1], c->t->size, fn); + if (c->class & Cptr) { + emit(Oblit1, 0, R, INT(c->t->size), R); + emit(Oblit0, 0, R, i->arg[1], i->arg[0]); + } } static Params diff --git a/load.c b/load.c index b16edd2..551d02e 100644 --- a/load.c +++ b/load.c @@ -6,7 +6,6 @@ typedef struct Loc Loc; typedef struct Slice Slice; typedef struct Insert Insert; - struct Loc { enum { LRoot, /* right above the original load */ @@ -19,6 +18,7 @@ struct Loc { struct Slice { Ref ref; + int off; short sz; short cls; /* load class */ }; @@ -194,6 +194,7 @@ killsl(Ref r, Slice sl) static Ref def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) { + Slice sl1; Blk *bp; bits msk1, msks; int off, cls, cls1, op, sz, ld; @@ -244,10 +245,33 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) sz = storesz(i); r1 = i->arg[1]; r = i->arg[0]; + } else if (i->op == Oblit1) { + assert(rtype(i->arg[0]) == RInt); + sz = abs(rsval(i->arg[0])); + --i; + assert(i->op == Oblit0); + r1 = i->arg[1]; } else continue; - switch (alias(sl.ref, sl.sz, r1, sz, &off, curf)) { + switch (alias(sl.ref, sl.off, sl.sz, r1, sz, &off, curf)) { case MustAlias: + if (i->op == Oblit0) { + sl1 = sl; + sl1.ref = i->arg[0]; + if (off >= 0) { + assert(off < sz); + sl1.off = off; + sz -= off; + off = 0; + } else { + sl1.off = 0; + sl1.sz += off; + } + if (sz > sl1.sz) + sz = sl1.sz; + assert(sz <= 8); + sl1.sz = sz; + } if (off < 0) { off = -off; msk1 = (MASK(sz) << 8*off) & msks; @@ -257,7 +281,12 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) op = Oshr; } if ((msk1 & msk) == 0) - break; + continue; + if (i->op == Oblit0) { + r = def(sl1, MASK(sz), b, i, il); + if (req(r, R)) + goto Load; + } if (off) { cls1 = cls; if (op == Oshr && off + sl.sz > 4) @@ -279,11 +308,11 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) return r; case MayAlias: if (ld) - break; + continue; else goto Load; case NoAlias: - break; + continue; default: die("unreachable"); } @@ -397,7 +426,7 @@ loadopt(Fn *fn) if (!isload(i->op)) continue; sz = loadsz(i); - sl = (Slice){i->arg[0], sz, i->cls}; + sl = (Slice){i->arg[0], 0, sz, i->cls}; l = (Loc){LRoot, i-b->ins, b}; i->arg[1] = def(sl, MASK(sz), b, i, &l); } diff --git a/main.c b/main.c index 3fcfd7f..abfe03e 100644 --- a/main.c +++ b/main.c @@ -78,6 +78,7 @@ func(Fn *fn) filluse(fn); fold(fn); T.abi1(fn); + simpl(fn); fillpreds(fn); filluse(fn); T.isel(fn); diff --git a/mem.c b/mem.c index 7570f19..5d59b96 100644 --- a/mem.c +++ b/mem.c @@ -195,12 +195,13 @@ coalesce(Fn *fn) Range r, *br; Slot *s, *s0, *sl; Blk *b, **ps, *succ[3]; - Ins *i; + Ins *i, **bl; Use *u; Tmp *t, *ts; Ref *arg; bits x; - int n, m, nsl, ip, *stk; + int64_t off0, off1; + int n, m, sz, nsl, nbl, ip, *stk; uint total, freed, fused; /* minimize the stack usage @@ -229,6 +230,8 @@ coalesce(Fn *fn) for (b=fn->start; b; b=b->link) b->loop = -1; loopiter(fn, maxrpo); + nbl = 0; + bl = vnew(0, sizeof bl[0], PHeap); br = emalloc(fn->nblk * sizeof br[0]); ip = INT_MAX - 1; for (n=fn->nblk-1; n>=0; n--) { @@ -247,8 +250,11 @@ coalesce(Fn *fn) } } } + if (b->jmp.type == Jretc) + load(b->jmp.arg, -1, --ip, fn, sl); for (i=&b->ins[b->nins]; i!=b->ins;) { - arg = (--i)->arg; + --i; + arg = i->arg; if (i->op == Oargc) { load(arg[1], -1, --ip, fn, sl); } @@ -260,6 +266,16 @@ coalesce(Fn *fn) x = BIT(storesz(i)) - 1; store(arg[1], x, ip--, fn, sl); } + if (i->op == Oblit0) { + assert((i+1)->op == Oblit1); + assert(rtype((i+1)->arg[0]) == RInt); + sz = abs(rsval((i+1)->arg[0])); + x = sz >= NBit ? (bits)-1 : BIT(sz) - 1; + store(arg[1], x, ip--, fn, sl); + load(arg[0], x, ip, fn, sl); + vgrow(&bl, ++nbl); + bl[nbl-1] = i; + } } for (s=sl; s<&sl[nsl]; s++) if (s->l) { @@ -321,6 +337,8 @@ coalesce(Fn *fn) stk[n-1] = i->to.val; } else { assert(!isarg(i->op)); + if (i->op == Oblit0) + *(i+1) = (Ins){.op = Onop}; *i = (Ins){.op = Onop}; } } @@ -340,7 +358,7 @@ coalesce(Fn *fn) if (s->s || !s->r.b) goto Skip; if (rovlap(r, s->r)) - /* O(n) can be approximated + /* O(n); can be approximated * by 'goto Skip;' if need be */ for (m=n; &sl[m]<s; m++) @@ -387,6 +405,20 @@ coalesce(Fn *fn) } } + /* fix newly overlapping blits */ + for (n=0; n<nbl; n++) { + i = bl[n]; + if (i->op == Oblit0) + if (slot(&s, &off0, i->arg[0], fn, sl)) + if (slot(&s0, &off1, i->arg[1], fn, sl)) + if (s->s == s0->s && off0 < off1) { + sz = rsval((i+1)->arg[0]); + assert(sz >= 0); + (i+1)->arg[0] = INT(-sz); + } + } + vfree(bl); + if (debug['M']) { for (s0=sl; s0<&sl[nsl]; s0++) { if (s0->s != s0) diff --git a/ops.h b/ops.h index 3d65081..fbcc2a8 100644 --- a/ops.h +++ b/ops.h @@ -129,6 +129,8 @@ O(copy, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1) V(0) /* Miscellaneous and Architecture-Specific Operations */ O(nop, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1) V(0) O(addr, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0) +O(blit0, T(m,e,e,e, m,e,e,e), 0) X(0, 1, 0) V(0) +O(blit1, T(w,e,e,e, x,e,e,e), 0) X(0, 1, 0) V(0) O(swap, T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0) V(0) O(sign, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0) O(salloc, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0) diff --git a/parse.c b/parse.c index 0836b9a..68488a2 100644 --- a/parse.c +++ b/parse.c @@ -27,7 +27,7 @@ typedef enum { PEnd, } PState; -enum { +enum Token { Txxx = 0, /* aliases */ @@ -38,6 +38,7 @@ enum { Talloc1, Talloc2, + Tblit, Tcall, Tenv, Tphi, @@ -94,6 +95,7 @@ static char *kwmap[Ntok] = { [Tloadd] = "loadd", [Talloc1] = "alloc1", [Talloc2] = "alloc2", + [Tblit] = "blit", [Tcall] = "call", [Tenv] = "env", [Tphi] = "phi", @@ -481,7 +483,7 @@ parserefl(int arg) expect(Tlparen); while (peek() != Trparen) { if (curi - insb >= NIns) - err("too many instructions (1)"); + err("too many instructions"); if (!arg && vararg) err("no parameters allowed after '...'"); switch (peek()) { @@ -578,6 +580,7 @@ parseline(PState ps) Phi *phi; Ref r; Blk *b; + Con *c; int t, op, i, k, ty; t = nextnl(); @@ -586,6 +589,7 @@ parseline(PState ps) switch (t) { default: if (isstore(t)) { + case Tblit: case Tcall: case Ovastart: /* operations without result */ @@ -657,11 +661,6 @@ parseline(PState ps) k = parsecls(&ty); op = next(); DoOp: - if (op == Tphi) { - if (ps != PPhi || curb == curf->start) - err("unexpected phi instruction"); - op = -1; - } if (op == Tcall) { arg[0] = parseref(); parserefl(1); @@ -686,14 +685,12 @@ DoOp: err("cannot use vastart in non-variadic function"); if (k >= Ksb) err("size class must be w, l, s, or d"); - if (op >= NPubOp) - err("invalid instruction"); i = 0; if (peek() != Tnl) for (;;) { if (i == NPred) err("too many arguments"); - if (op == -1) { + if (op == Tphi) { expect(Tlbl); blk[i] = findblk(tokval.str); } @@ -709,18 +706,10 @@ DoOp: next(); } next(); -Ins: - if (op != -1) { - if (curi - insb >= NIns) - err("too many instructions (2)"); - curi->op = op; - curi->cls = k; - curi->to = r; - curi->arg[0] = arg[0]; - curi->arg[1] = arg[1]; - curi++; - return PIns; - } else { + switch (op) { + case Tphi: + if (ps != PPhi || curb == curf->start) + err("unexpected phi instruction"); phi = alloc(sizeof *phi); phi->to = r; phi->cls = k; @@ -732,6 +721,39 @@ Ins: *plink = phi; plink = &phi->link; return PPhi; + case Tblit: + if (curi - insb >= NIns-1) + err("too many instructions"); + memset(curi, 0, 2 * sizeof(Ins)); + curi->op = Oblit0; + curi->arg[0] = arg[0]; + curi->arg[1] = arg[1]; + curi++; + if (rtype(arg[2]) != RCon) + err("blit size must be constant"); + c = &curf->con[arg[2].val]; + r = INT(c->bits.i); + if (c->type != CBits + || rsval(r) < 0 + || rsval(r) != c->bits.i) + err("invalid blit size"); + curi->op = Oblit1; + curi->arg[0] = r; + curi++; + return PIns; + default: + if (op >= NPubOp) + err("invalid instruction"); + Ins: + if (curi - insb >= NIns) + err("too many instructions"); + curi->op = op; + curi->cls = k; + curi->to = r; + curi->arg[0] = arg[0]; + curi->arg[1] = arg[1]; + curi++; + return PIns; } } @@ -1241,6 +1263,9 @@ printref(Ref r, Fn *fn, FILE *f) } fputc(']', f); break; + case RInt: + fprintf(f, "%d", rsval(r)); + break; } } diff --git a/rv64/abi.c b/rv64/abi.c index 3a97a6a..e31425c 100644 --- a/rv64/abi.c +++ b/rv64/abi.c @@ -222,7 +222,8 @@ selret(Blk *b, Fn *fn) typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg); if (cr.class & Cptr) { assert(rtype(fn->retr) == RTmp); - blit0(fn->retr, r, cr.type->size, fn); + emit(Oblit1, 0, R, INT(cr.type->size), R); + emit(Oblit0, 0, R, r, fn->retr); cty = 0; } else { ldregs(&cr, r, fn); @@ -341,7 +342,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) Class *ca, *c, cr; int j, k, cty; uint64_t stk, off; - Ref r, r1, tmp[2]; + Ref r, r1, r2, tmp[2]; ca = alloc((i1-i0) * sizeof ca[0]); cr.class = 0; @@ -419,8 +420,10 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) k = KWIDE(*c->cls) ? Kl : Kw; emit(Ocast, k, TMP(*c->reg), i->arg[0], R); } - if (c->class & Cptr) - blit0(i->arg[0], i->arg[1], c->type->size, fn); + if (c->class & Cptr) { + emit(Oblit1, 0, R, INT(c->type->size), R); + emit(Oblit0, 0, R, i->arg[1], i->arg[0]); + } } if (!stk) @@ -450,11 +453,21 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) } if (i->op == Oargc) { if (c->class & Cstk1) { - blit(r, off, i->arg[1], 0, 8, fn); + r1 = newtmp("abi", Kl, fn); + r2 = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, r2, r1); + emit(Oadd, Kl, r1, r, getcon(off, fn)); + emit(Oload, Kl, r2, i->arg[1], R); off += 8; } if (c->class & Cstk2) { - blit(r, off, i->arg[1], 8, 8, fn); + r1 = newtmp("abi", Kl, fn); + r2 = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, r2, r1); + emit(Oadd, Kl, r1, r, getcon(off, fn)); + r1 = newtmp("abi", Kl, fn); + emit(Oload, Kl, r2, r1, R); + emit(Oadd, Kl, r1, i->arg[1], getcon(8, fn)); off += 8; } } diff --git a/simpl.c b/simpl.c new file mode 100644 index 0000000..7001301 --- /dev/null +++ b/simpl.c @@ -0,0 +1,82 @@ +#include "all.h" + +static void +blit(Ref sd[2], int sz, Fn *fn) +{ + struct { int st, ld, cls, size; } *p, tbl[] = { + { Ostorel, Oload, Kl, 8 }, + { Ostorew, Oload, Kw, 4 }, + { Ostoreh, Oloaduh, Kw, 2 }, + { Ostoreb, Oloadub, Kw, 1 } + }; + Ref r, r1, ro; + int off, fwd, n; + + fwd = sz >= 0; + sz = abs(sz); + off = fwd ? sz : 0; + for (p=tbl; sz; p++) + for (n=p->size; sz>=n; sz-=n) { + off -= fwd ? n : 0; + r = newtmp("blt", Kl, fn); + r1 = newtmp("blt", Kl, fn); + ro = getcon(off, fn); + emit(p->st, 0, R, r, r1); + emit(Oadd, Kl, r1, sd[1], ro); + r1 = newtmp("blt", Kl, fn); + emit(p->ld, p->cls, r, r1, R); + emit(Oadd, Kl, r1, sd[0], ro); + off += fwd ? 0 : n; + } +} + +static void +ins(Ins **pi, int *new, Blk *b, Fn *fn) +{ + ulong ni; + Ins *i; + + i = *pi; + /* simplify more instructions here; + * copy 0 into xor, mul 2^n into shift, + * bit rotations, ... */ + switch (i->op) { + case Oblit1: + assert(i > b->ins); + assert((i-1)->op == Oblit0); + if (!*new) { + curi = &insb[NIns]; + ni = &b->ins[b->nins] - (i+1); + curi -= ni; + icpy(curi, i+1, ni); + *new = 1; + } + blit((i-1)->arg, rsval(i->arg[0]), fn); + *pi = i-1; + break; + default: + if (*new) + emiti(*i); + break; + } +} + +void +simpl(Fn *fn) +{ + Blk *b; + Ins *i; + int new; + + for (b=fn->start; b; b=b->link) { + new = 0; + for (i=&b->ins[b->nins]; i!=b->ins;) { + --i; + ins(&i, &new, b, fn); + } + if (new) { + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + } +} diff --git a/test/load2.ssa b/test/load2.ssa new file mode 100644 index 0000000..05c12a6 --- /dev/null +++ b/test/load2.ssa @@ -0,0 +1,75 @@ +# blit & load elimination + +export +function $f() { +@start + %x =l alloc4 12 + %y =l alloc4 12 + + %x1 =l add 1, %x + %x2 =l add 1, %x1 + %x3 =l add 1, %x2 + %x4 =l add 1, %x3 + %x5 =l add 1, %x4 + %x6 =l add 1, %x5 + %x7 =l add 1, %x6 + %x8 =l add 1, %x7 + %x9 =l add 1, %x8 + %xa =l add 1, %x9 + %xb =l add 1, %xa + + %y1 =l add 1, %y + %y4 =l add 4, %y + + storew 287454020, %x4 # 0x11223344 + storew 1432778632, %y # 0x55667788 + blit %y, %x5, 1 + %n =w load %x4 + call $px(w %n) # 0x11228844 + + storew 287454020, %x4 # 0x11223344 + storew 1432778632, %y # 0x55667788 + blit %y, %x5, 2 + %n =w load %x4 + call $px(w %n) # 0x11778844 + + storew 287454020, %x4 # 0x11223344 + storew 1432778632, %y # 0x55667788 + blit %y, %x5, 4 + %n =w load %x4 + call $px(w %n) # 0x66778844 + + storew 287454020, %x4 # 0x11223344 + storew 1432778632, %y # 0x55667788 + blit %y, %x2, 4 + %n =w load %x4 + call $px(w %n) # 0x11225566 + + storew 287454020, %x4 # 0x11223344 + storew 0, %y + storew 1432778632, %y4 # 0x55667788 + blit %y1, %x2, 7 + %n =w load %x4 + call $px(w %n) # 0x66778800 + + ret +} + +# >>> driver +# #include <stdio.h> +# void px(unsigned n) { +# printf("0x%08x\n", n); +# } +# int main() { +# extern void f(void); +# f(); +# } +# <<< + +# >>> output +# 0x11228844 +# 0x11778844 +# 0x66778844 +# 0x11225566 +# 0x66778800 +# <<< diff --git a/test/mem1.ssa b/test/mem1.ssa new file mode 100644 index 0000000..b7045a6 --- /dev/null +++ b/test/mem1.ssa @@ -0,0 +1,35 @@ +type :i3 = { w 3 } + +export +function :i3 $blit() { +@start + %l0 =l alloc4 12 + %l1 =l alloc4 12 + + storew 287454020, %l0 + %l04 =l add %l0, 4 + storew 1432778632, %l04 + %l08 =l add %l0, 8 + storew 2578103244, %l08 + + # we expect that %l0 and %l1 + # are coalesced and the blit + # goes backwards + %l11 =l add %l1, 1 + blit %l0, %l11, 11 + + storeb 221, %l1 + + ret %l1 +} + +# >>> driver +# struct i3 { int a, b, c; }; +# extern struct i3 blit(); +# int main() { +# struct i3 s = blit(); +# return !(s.a == 0x223344dd +# && s.b == 0x66778811 +# && s.c == 0xaabbcc55); +# } +# <<< diff --git a/tools/lexh.c b/tools/lexh.c index a07514e..5ceb4ee 100644 --- a/tools/lexh.c +++ b/tools/lexh.c @@ -26,7 +26,7 @@ char *tok[] = { "vaarg", "vastart", "...", "env", "call", "phi", "jmp", "jnz", "ret", "hlt", "export", - "function", "type", "data", "section", "align", + "function", "type", "data", "section", "align", "blit", "l", "w", "sh", "uh", "h", "sb", "ub", "b", "d", "s", "z", "loadw", "loadl", "loads", "loadd", "alloc1", "alloc2", diff --git a/util.c b/util.c index 41b2625..8432b5a 100644 --- a/util.c +++ b/util.c @@ -404,36 +404,6 @@ addcon(Con *c0, Con *c1) } void -blit(Ref rdst, uint doff, Ref rsrc, uint boff, uint sz, Fn *fn) -{ - struct { int st, ld, cls, size; } *p, tbl[] = { - { Ostorel, Oload, Kl, 8 }, - { Ostorew, Oload, Kw, 4 }, - { Ostoreh, Oloaduh, Kw, 2 }, - { Ostoreb, Oloadub, Kw, 1 } - }; - Ref r, r1; - uint s; - - for (p=tbl; sz; p++) - for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) { - r = newtmp("blt", Kl, fn); - r1 = newtmp("blt", Kl, fn); - emit(p->st, 0, R, r, r1); - emit(Oadd, Kl, r1, rdst, getcon(doff, fn)); - r1 = newtmp("blt", Kl, fn); - emit(p->ld, p->cls, r, r1, R); - emit(Oadd, Kl, r1, rsrc, getcon(boff, fn)); - } -} - -void -blit0(Ref rdst, Ref rsrc, uint sz, Fn *fn) -{ - blit(rdst, 0, rsrc, 0, sz, fn); -} - -void salloc(Ref rt, Ref rs, Fn *fn) { Ref r0, r1; |