From 4e93eeaa3b63b6ae50954a29662cc3ea6be48b23 Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Sat, 12 Feb 2022 02:27:50 -0800 Subject: add rv64 backend It is mostly complete, but still has a few ABI bugs when passing floats in structs, or when structs are passed partly in register, and partly on stack. --- rv64/isel.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 rv64/isel.c (limited to 'rv64/isel.c') diff --git a/rv64/isel.c b/rv64/isel.c new file mode 100644 index 0000000..bb6fb02 --- /dev/null +++ b/rv64/isel.c @@ -0,0 +1,278 @@ +#include "all.h" + +static int +memarg(Ref *r, int op, Ins *i) +{ + return ((isload(op) || op == Ocall) && r == &i->arg[0]) + || (isstore(op) && r == &i->arg[1]); +} + +static int +immarg(Ref *r, int op, Ins *i) +{ + return rv64_op[op].imm && r == &i->arg[1]; +} + +static void +fixarg(Ref *r, int k, Ins *i, Fn *fn) +{ + char buf[32]; + Ref r0, r1; + int s, n, op; + Con *c; + + r0 = r1 = *r; + op = i ? i->op : Ocopy; + switch (rtype(r0)) { + case RCon: + c = &fn->con[r0.val]; + if (c->type == CAddr && memarg(r, op, i)) + break; + if (c->type == CBits && immarg(r, op, i) + && -2048 <= c->bits.i && c->bits.i < 2048) + break; + r1 = newtmp("isel", k, fn); + if (KBASE(k) == 1) { + /* load floating points from memory + * slots, they can't be used as + * immediates + */ + assert(c->type == CBits); + n = gasstash(&c->bits, KWIDE(k) ? 8 : 4); + vgrow(&fn->con, ++fn->ncon); + c = &fn->con[fn->ncon-1]; + sprintf(buf, "fp%d", n); + *c = (Con){.type = CAddr, .local = 1}; + c->label = intern(buf); + emit(Oload, k, r1, CON(c-fn->con), R); + break; + } + emit(Ocopy, k, r1, r0, R); + break; + case RTmp: + if (isreg(r0)) + break; + s = fn->tmp[r0.val].slot; + if (s != -1) { + /* aggregate passed by value on + * stack, or fast local address, + * replace with slot if we can + */ + if (memarg(r, op, i)) { + r1 = SLOT(s); + break; + } + r1 = newtmp("isel", k, fn); + emit(Oaddr, k, r1, SLOT(s), R); + break; + } + if (k == Kw && fn->tmp[r0.val].cls == Kl) { + /* TODO: this sign extension isn't needed + * for 32-bit arithmetic instructions + */ + r1 = newtmp("isel", k, fn); + emit(Oextsw, Kl, r1, r0, R); + } else { + assert(k == fn->tmp[r0.val].cls); + } + break; + } + *r = r1; +} + +static void +negate(Ref *pr, Fn *fn) +{ + Ref r; + + r = newtmp("isel", Kw, fn); + emit(Oxor, Kw, *pr, r, getcon(1, fn)); + *pr = r; +} + +static void +selcmp(Ins i, int k, int op, Fn *fn) +{ + Ins *icmp; + Ref r, r0, r1; + int sign, swap, neg; + + switch (op) { + case Cieq: + r = newtmp("isel", k, fn); + emit(Oreqz, i.cls, i.to, r, R); + emit(Oxor, k, r, i.arg[0], i.arg[1]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + return; + case Cine: + r = newtmp("isel", k, fn); + emit(Ornez, i.cls, i.to, r, R); + emit(Oxor, k, r, i.arg[0], i.arg[1]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + return; + case Cisge: sign = 1, swap = 0, neg = 1; break; + case Cisgt: sign = 1, swap = 1, neg = 0; break; + case Cisle: sign = 1, swap = 1, neg = 1; break; + case Cislt: sign = 1, swap = 0, neg = 0; break; + case Ciuge: sign = 0, swap = 0, neg = 1; break; + case Ciugt: sign = 0, swap = 1, neg = 0; break; + case Ciule: sign = 0, swap = 1, neg = 1; break; + case Ciult: sign = 0, swap = 0, neg = 0; break; + case NCmpI+Cfeq: + case NCmpI+Cfge: + case NCmpI+Cfgt: + case NCmpI+Cfle: + case NCmpI+Cflt: + swap = 0, neg = 0; + break; + case NCmpI+Cfuo: + negate(&i.to, fn); + /* fallthrough */ + case NCmpI+Cfo: + r0 = newtmp("isel", i.cls, fn); + r1 = newtmp("isel", i.cls, fn); + emit(Oand, i.cls, i.to, r0, r1); + op = KWIDE(k) ? Oceqd : Oceqs; + emit(op, i.cls, r0, i.arg[0], i.arg[0]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + emit(op, i.cls, r1, i.arg[1], i.arg[1]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + return; + case NCmpI+Cfne: + swap = 0, neg = 1; + i.op = KWIDE(k) ? Oceqd : Oceqs; + break; + default: + assert(0 && "unknown comparison"); + } + if (op < NCmpI) + i.op = sign ? Ocsltl : Ocultl; + if (swap) { + r = i.arg[0]; + i.arg[0] = i.arg[1]; + i.arg[1] = r; + } + if (neg) + negate(&i.to, fn); + emiti(i); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); +} + +static void +sel(Ins i, Fn *fn) +{ + Ref r0, r1; + Ins *i0; + int ck, cc; + int64_t sz; + + switch (i.op) { + case Onop: + break; + case Oalloc4: + case Oalloc8: + case Oalloc16: + /* we need to make sure + * the stack remains aligned + * (rsp = 0) mod 16 + */ + fn->dynalloc = 1; + if (rtype(i.arg[0]) == RCon) { + sz = fn->con[i.arg[0].val].bits.i; + if (sz < 0) + err("invalid alloc size %"PRId64, sz); + sz = (sz + 15) & -16; + emit(Osalloc, Kl, i.to, getcon(sz, fn), R); + fixarg(&curi->arg[0], Kl, curi, fn); + } else { + /* r0 = (i.arg[0] + 15) & -16 */ + r0 = newtmp("isel", Kl, fn); + r1 = newtmp("isel", Kl, fn); + emit(Osalloc, Kl, i.to, r0, R); + emit(Oand, Kl, r0, r1, getcon(-16, fn)); + emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn)); + if (fn->tmp[i.arg[0].val].slot != -1) + err("unlikely argument %%%s in %s", + fn->tmp[i.arg[0].val].name, optab[i.op].name); + } + break; + default: + if (iscmp(i.op, &ck, &cc)) { + selcmp(i, ck, cc, fn); + break; + } + emiti(i); + i0 = curi; /* fixarg() can change curi */ + fixarg(&i0->arg[0], argcls(&i, 0), i0, fn); + fixarg(&i0->arg[1], argcls(&i, 1), i0, fn); + } +} + +static void +seljmp(Blk *b, Fn *fn) +{ + /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */ + if (b->jmp.type == Jjnz) + fixarg(&b->jmp.arg, Kw, 0, fn); +} + +void +rv64_isel(Fn *fn) +{ + Blk *b, **sb; + Ins *i; + Phi *p; + uint n; + int al; + int64_t sz; + + /* assign slots to fast allocs */ + b = fn->start; + /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ + for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (i->op == al) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-15) + err("invalid alloc size %"PRId64, sz); + sz = (sz + n-1) & -n; + sz /= 4; + if (sz > INT_MAX - fn->slot) + die("alloc too large"); + fn->tmp[i->to.val].slot = fn->slot; + fn->slot += sz; + *i = (Ins){.op = Onop}; + } + + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) + for (p=(*sb)->phi; p; p=p->link) { + for (n=0; p->blk[n] != b; n++) + assert(n+1 < p->narg); + fixarg(&p->arg[n], p->cls, 0, fn); + } + seljmp(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + sel(*--i, fn); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + + if (debug['I']) { + fprintf(stderr, "\n> After instruction selection:\n"); + printfn(fn, stderr); + } +} -- cgit 1.4.1