From 4e93eeaa3b63b6ae50954a29662cc3ea6be48b23 Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Sat, 12 Feb 2022 02:27:50 -0800 Subject: add rv64 backend It is mostly complete, but still has a few ABI bugs when passing floats in structs, or when structs are passed partly in register, and partly on stack. --- rv64/abi.c | 584 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rv64/all.h | 49 +++++ rv64/emit.c | 499 +++++++++++++++++++++++++++++++++++++++++++++++++++ rv64/isel.c | 278 +++++++++++++++++++++++++++++ rv64/targ.c | 53 ++++++ 5 files changed, 1463 insertions(+) create mode 100644 rv64/abi.c create mode 100644 rv64/all.h create mode 100644 rv64/emit.c create mode 100644 rv64/isel.c create mode 100644 rv64/targ.c (limited to 'rv64') diff --git a/rv64/abi.c b/rv64/abi.c new file mode 100644 index 0000000..1dd4fb0 --- /dev/null +++ b/rv64/abi.c @@ -0,0 +1,584 @@ +#include "all.h" + +typedef struct Class Class; +typedef struct Insl Insl; +typedef struct Params Params; + +enum { + Cptr = 1, /* replaced by a pointer */ + Cstk1 = 2, /* pass first XLEN on the stack */ + Cstk2 = 4, /* pass second XLEN on the stack */ + Cstk = Cstk1 | Cstk2, + Cfpint = 8, /* float passed like integer */ +}; + +struct Class { + char class; + uint size; + Typ *t; + uchar nreg; + uchar ngp; + uchar nfp; + int reg[2]; + int cls[2]; +}; + +struct Insl { + Ins i; + Insl *link; +}; + +struct Params { + int ngp; + int nfp; + int stk; /* stack offset for varargs */ +}; + +static int gpreg[] = { A0, A1, A2, A3, A4, A5, A6, A7}; +static int fpreg[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7}; + +/* layout of call's second argument (RCall) + * + * 29 8 4 2 0 + * |0.00|xxxx|xxxx|xx|xx| range + * | | | ` gp regs returned (0..2) + * | | ` fp regs returned (0..2) + * | ` gp regs passed (0..8) + * ` fp regs passed (0..8) + */ + +bits +rv64_retregs(Ref r, int p[2]) +{ + bits b; + int ngp, nfp; + + assert(rtype(r) == RCall); + ngp = r.val & 3; + nfp = (r.val >> 2) & 3; + if (p) { + p[0] = ngp; + p[1] = nfp; + } + b = 0; + while (ngp--) + b |= BIT(A0+ngp); + while (nfp--) + b |= BIT(FA0+nfp); + return b; +} + +bits +rv64_argregs(Ref r, int p[2]) +{ + bits b; + int ngp, nfp; + + assert(rtype(r) == RCall); + ngp = (r.val >> 4) & 15; + nfp = (r.val >> 8) & 15; + b = 0; + if (p) { + p[0] = ngp; + p[1] = nfp; + } + b = 0; + while (ngp--) + b |= BIT(A0+ngp); + while (nfp--) + b |= BIT(FA0+nfp); + return b; +} + +static void +typclass(Class *c, Typ *t, int *gp, int *fp) +{ + uint64_t sz; + uint n; + + sz = (t->size + 7) & ~7; + c->t = t; + c->class = 0; + c->ngp = 0; + c->nfp = 0; + + if (t->align > 4) + err("alignments larger than 16 are not supported"); + + if (t->dark || sz > 16 || sz == 0) { + /* large structs are replaced by a + * pointer to some caller-allocated + * memory */ + c->class |= Cptr; + c->size = 8; + return; + } + + c->size = sz; + + /* TODO: float */ + + for (n=0; nngp++) { + c->reg[n] = *gp++; + c->cls[n] = Kl; + } + + c->nreg = n; +} + +static void +sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn) +{ + static int st[] = { + [Kw] = Ostorew, [Kl] = Ostorel, + [Ks] = Ostores, [Kd] = Ostored + }; + uint n; + uint64_t off; + Ref r; + + assert(nreg <= 4); + off = 0; + for (n=0; njmp.type; + + if (!isret(j) || j == Jret0) + return; + + r = b->jmp.arg; + b->jmp.type = Jret0; + + if (j == Jretc) { + typclass(&cr, &typ[fn->retty], gpreg, fpreg); + cty = (cr.nfp << 2) | cr.ngp; + if (cr.class & Cptr) { + assert(rtype(fn->retr) == RTmp); + blit(fn->retr, 0, r, cr.t->size, fn); + } else { + ldregs(cr.reg, cr.cls, cr.nreg, r, fn); + } + } else { + k = j - Jretw; + if (KBASE(k) == 0) { + emit(Ocopy, k, TMP(A0), r, R); + cty = 1; + } else { + emit(Ocopy, k, TMP(FA0), r, R); + cty = 1 << 2; + } + } + + b->jmp.arg = CALL(cty); +} + +static int +argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr) +{ + int ngp, nfp, *gp, *fp, vararg; + Class *c; + Ins *i; + + gp = gpreg; + fp = fpreg; + ngp = 8; + nfp = 8; + vararg = 0; + if (retptr) { + gp++; + ngp--; + } + for (i=i0, c=carg; iop) { + case Opar: + case Oarg: + c->cls[0] = i->cls; + c->size = 8; + /* variadic float args are passed in int regs */ + if (!vararg && KBASE(i->cls) == 1 && nfp > 0) { + nfp--; + c->reg[0] = *fp++; + } else if (ngp > 0) { + if (KBASE(i->cls) == 1) + c->class |= Cfpint; + ngp--; + c->reg[0] = *gp++; + } else { + c->class |= Cstk1; + } + break; + case Oargv: + /* subsequent arguments are variadic */ + vararg = 1; + break; + case Oparc: + case Oargc: + typclass(c, &typ[i->arg[0].val], gp, fp); + if (c->class & Cptr) { + c->ngp = 1; + c->reg[0] = *gp; + c->cls[0] = Kl; + } + if (c->ngp <= ngp && c->nfp <= nfp) { + ngp -= c->ngp; + nfp -= c->nfp; + gp += c->ngp; + fp += c->nfp; + break; + } + c->ngp += c->nfp; + c->nfp = 0; + if (c->ngp <= ngp) { + ngp -= c->ngp; + gp += c->ngp; + break; + } + c->class |= Cstk1; + if (c->ngp - 1 > ngp) + c->class |= Cstk2; + break; + case Opare: + *env = i->to; + break; + case Oarge: + *env = i->arg[0]; + break; + } + } + return (gp-gpreg) << 4 | (fp-fpreg) << 8; +} + +static void +stkblob(Ref r, Class *c, Fn *fn, Insl **ilp) +{ + Insl *il; + int al; + uint64_t sz; + + il = alloc(sizeof *il); + al = c->t->align - 2; /* NAlign == 3 */ + if (al < 0) + al = 0; + sz = c->class & Cptr ? c->t->size : c->size; + il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}}; + il->link = *ilp; + *ilp = il; +} + +static void +selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) +{ + Ins *i; + Class *ca, *c, cr; + int k, cty, envc; + uint n; + uint64_t stk, off; + Ref r, r1, env, tmp[2]; + + env = R; + ca = alloc((i1-i0) * sizeof ca[0]); + cr.class = 0; + + if (!req(i1->arg[1], R)) + typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg); + + cty = argsclass(i0, i1, ca, &env, cr.class & Cptr); + stk = 0; + for (i=i0, c=ca; iop == Oargv) + continue; + if (c->class & Cptr) { + i->arg[0] = newtmp("abi", Kl, fn); + stkblob(i->arg[0], c, fn, ilp); + i->op = Oarg; + } + if (c->class & Cstk1) + stk += 8; + if (c->class & Cstk2) + stk += 8; + } + if (stk) + emit(Osalloc, Kl, R, getcon(-stk, fn), R); + + if (!req(i1->arg[1], R)) { + stkblob(i1->to, &cr, fn, ilp); + cty |= (cr.nfp << 2) | cr.ngp; + if (cr.class & Cptr) { + cty |= 1; + emit(Ocopy, Kw, R, TMP(A0), R); + } else { + sttmps(tmp, cr.cls, cr.nreg, i1->to, fn); + for (n=0; ncls) == 0) { + emit(Ocopy, i1->cls, i1->to, TMP(A0), R); + cty |= 1; + } else { + emit(Ocopy, i1->cls, i1->to, TMP(FA0), R); + cty |= 1 << 2; + } + + envc = !req(R, env); + if (envc) + die("todo (rv64 abi): env calls"); + emit(Ocall, 0, R, i1->arg[0], CALL(cty)); + + if (cr.class & Cptr) + /* struct return argument */ + emit(Ocopy, Kl, TMP(A0), i1->to, R); + + /* move arguments into registers */ + for (i=i0, c=ca; iop == Oargv || c->class & Cstk1) + continue; + if (i->op == Oargc) { + ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn); + } else if (c->class & Cfpint) { + k = KWIDE(*c->cls) ? Kl : Kw; + r = newtmp("abi", k, fn); + emit(Ocopy, k, TMP(c->reg[0]), r, R); + c->reg[0] = r.val; + } else { + emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R); + } + } + + for (i=i0, c=ca; iclass & Cfpint) + emit(Ocast, KWIDE(*c->cls) ? Kl : Kw, TMP(*c->reg), i->arg[0], R); + if (c->class & Cptr) + blit(i->arg[0], 0, i->arg[1], c->t->size, fn); + } + + if (!stk) + return; + + r = newtmp("abi", Kl, fn); + for (i=i0, c=ca, off=0; iop == Oargv || (c->class & Cstk) == 0) + continue; + if (i->op != Oargc) { + r1 = newtmp("abi", Kl, fn); + /* w arguments are stored sign-extended + * to 64-bits + * + * s arguments can just be stored with + * Ostores into the first 32-bits in the + * stack position since the ABI says the + * upper bits are undefined + */ + emit(i->cls == Kw ? Ostorel : Ostorew+i->cls, 0, R, i->arg[0], r1); + if (i->cls == Kw) { + /* TODO: we only need this sign extension + * for subtyped l temporaries passed as w + * arguments (see rv64/isel.c:fixarg) + * + * however, we cannot just fix it in isel + * since by that point we have forgotten + * the original argument type + */ + curi->arg[0] = newtmp("abi", Kl, fn); + emit(Oextsw, Kl, curi->arg[0], i->arg[0], R); + } + emit(Oadd, Kl, r1, r, getcon(off, fn)); + } else + blit(r, off, i->arg[1], c->t->size, fn); + off += c->size; + } + emit(Osalloc, Kl, r, getcon(stk, fn), R); +} + +static Params +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + Class *ca, *c, cr; + Insl *il; + Ins *i; + int n, s, cty; + Ref r, env, tmp[16], *t; + + env = R; + ca = alloc((i1-i0) * sizeof ca[0]); + cr.class = 0; + curi = &insb[NIns]; + + if (fn->retty >= 0) { + typclass(&cr, &typ[fn->retty], gpreg, fpreg); + if (cr.class & Cptr) { + fn->retr = newtmp("abi", Kl, fn); + emit(Ocopy, Kl, fn->retr, TMP(A0), R); + } + } + + cty = argsclass(i0, i1, ca, &env, cr.class & Cptr); + fn->reg = rv64_argregs(CALL(cty), 0); + + il = 0; + t = tmp; + for (i=i0, c=ca; iop != Oparc || (c->class & (Cptr|Cstk))) + continue; + sttmps(t, c->cls, c->nreg, i->to, fn); + stkblob(i->to, c, fn, &il); + t += c->nreg; + } + for (; il; il=il->link) + emiti(il->i); + + t = tmp; + for (i=i0, c=ca, s=2 + 8 * fn->vararg; iop == Oparc + && (c->class & Cptr) == 0) { + if (c->class & Cstk) { + fn->tmp[i->to.val].slot = -s; + s += c->size / 8; + } else { + for (n=0; nnreg; n++) { + r = TMP(c->reg[n]); + emit(Ocopy, c->cls[n], *t++, r, R); + } + } + } else if (c->class & Cstk1) { + emit(Oload, c->cls[0], i->to, SLOT(-s), R); + s++; + } else { + emit(Ocopy, c->cls[0], i->to, TMP(c->reg[0]), R); + } + } + + if (!req(R, env)) + die("todo (rv64 abi): env calls"); + + return (Params){ + .stk = s, + .ngp = (cty >> 4) & 15, + .nfp = (cty >> 8) & 15, + }; +} + +static void +selvaarg(Fn *fn, Ins *i) +{ + Ref loc, newloc; + + loc = newtmp("abi", Kl, fn); + newloc = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, newloc, i->arg[0]); + emit(Oadd, Kl, newloc, loc, getcon(8, fn)); + emit(Oload, i->cls, i->to, loc, R); + emit(Oload, Kl, loc, i->arg[0], R); +} + +static void +selvastart(Fn *fn, Params p, Ref ap) +{ + Ref rsave; + int s; + + rsave = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, rsave, ap); + s = p.stk > 2 + 8 * fn->vararg ? p.stk : 2 + p.ngp; + emit(Oaddr, Kl, rsave, SLOT(-s), R); +} + +void +rv64_abi(Fn *fn) +{ + Blk *b; + Ins *i, *i0, *ip; + Insl *il; + int n; + Params p; + + for (b=fn->start; b; b=b->link) + b->visit = 0; + + /* lower parameters */ + for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++) + if (!ispar(i->op)) + break; + p = selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (&insb[NIns] - curi); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, curi, &insb[NIns] - curi); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower calls, returns, and vararg instructions */ + il = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + if (b->visit) + continue; + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + switch ((--i)->op) { + default: + emiti(*i); + break; + case Ocall: + for (i0=i; i0>b->ins; i0--) + if (!isarg((i0-1)->op)) + break; + selcall(fn, i0, i, &il); + i = i0; + break; + case Ovastart: + selvastart(fn, p, i->arg[0]); + break; + case Ovaarg: + selvaarg(fn, i); + break; + case Oarg: + case Oargc: + die("unreachable"); + } + if (b == fn->start) + for (; il; il=il->link) + emiti(il->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After ABI lowering:\n"); + printfn(fn, stderr); + } +} diff --git a/rv64/all.h b/rv64/all.h new file mode 100644 index 0000000..eb2daa9 --- /dev/null +++ b/rv64/all.h @@ -0,0 +1,49 @@ +#include "../all.h" + +typedef struct Rv64Op Rv64Op; + +enum Rv64Reg { + /* caller-save */ + T0 = RXX + 1, T1, T2, T3, T4, T5, + A0, A1, A2, A3, A4, A5, A6, A7, + + /* callee-save */ + S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, + + /* globally live */ + FP, SP, GP, TP, RA, T6, + + /* FP caller-save */ + FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7, FT8, FT9, FT10, FT11, + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7, + + /* FP callee-save */ + FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11, + + NFPR = FS11 - FT0 + 1, + NGPR = T6 - T0 + 1, + NGPS = A7 - T0 + 1, + NFPS = FA7 - FT0 + 1, + NCLR = (S11 - S1 + 1) + (FS11 - FS0 + 1), +}; +MAKESURE(reg_not_tmp, FS11 < (int)Tmp0); + +struct Rv64Op { + char imm; +}; + +/* targ.c */ +extern int rv64_rsave[]; +extern int rv64_rclob[]; +extern Rv64Op rv64_op[]; + +/* abi.c */ +bits rv64_retregs(Ref, int[2]); +bits rv64_argregs(Ref, int[2]); +void rv64_abi(Fn *); + +/* isel.c */ +void rv64_isel(Fn *); + +/* emit.c */ +void rv64_emitfn(Fn *, FILE *); diff --git a/rv64/emit.c b/rv64/emit.c new file mode 100644 index 0000000..b34b424 --- /dev/null +++ b/rv64/emit.c @@ -0,0 +1,499 @@ +#include "all.h" + +enum { + Ki = -1, /* matches Kw and Kl */ + Ka = -2, /* matches all classes */ +}; + +static struct { + short op; + short cls; + char *asm; +} omap[] = { + { Oadd, Ki, "add%k %=, %0, %1" }, + { Oadd, Ka, "fadd.%k %=, %0, %1" }, + { Osub, Ki, "sub%k %=, %0, %1" }, + { Osub, Ka, "fsub.%k %=, %0, %1" }, + { Oneg, Ki, "neg%k %=, %0" }, + { Oneg, Ka, "fneg.%k %=, %0" }, + { Odiv, Ki, "div%k %=, %0, %1" }, + { Odiv, Ka, "fdiv.%k %=, %0, %1" }, + { Orem, Ki, "rem%k %=, %0, %1" }, + { Orem, Kl, "rem %=, %0, %1" }, + { Oudiv, Ki, "divu%k %=, %0, %1" }, + { Ourem, Ki, "remu%k %=, %0, %1" }, + { Omul, Ki, "mul%k %=, %0, %1" }, + { Omul, Ka, "fmul.%k %=, %0, %1" }, + { Oand, Ki, "and %=, %0, %1" }, + { Oor, Ki, "or %=, %0, %1" }, + { Oxor, Ki, "xor %=, %0, %1" }, + { Osar, Ki, "sra%k %=, %0, %1" }, + { Oshr, Ki, "srl%k %=, %0, %1" }, + { Oshl, Ki, "sll%k %=, %0, %1" }, + { Ocsltl, Ki, "slt %=, %0, %1" }, + { Ocultl, Ki, "sltu %=, %0, %1" }, + { Oceqs, Ki, "feq.s %=, %0, %1" }, + { Ocges, Ki, "fge.s %=, %0, %1" }, + { Ocgts, Ki, "fgt.s %=, %0, %1" }, + { Ocles, Ki, "fle.s %=, %0, %1" }, + { Oclts, Ki, "flt.s %=, %0, %1" }, + { Oceqd, Ki, "feq.d %=, %0, %1" }, + { Ocged, Ki, "fge.d %=, %0, %1" }, + { Ocgtd, Ki, "fgt.d %=, %0, %1" }, + { Ocled, Ki, "fle.d %=, %0, %1" }, + { Ocltd, Ki, "flt.d %=, %0, %1" }, + { Ostoreb, Kw, "sb %0, %M1" }, + { Ostoreh, Kw, "sh %0, %M1" }, + { Ostorew, Kw, "sw %0, %M1" }, + { Ostorel, Ki, "sd %0, %M1" }, + { Ostores, Kw, "fsw %0, %M1" }, + { Ostored, Kw, "fsd %0, %M1" }, + { Oloadsb, Ki, "lb %=, %M0" }, + { Oloadub, Ki, "lbu %=, %M0" }, + { Oloadsh, Ki, "lh %=, %M0" }, + { Oloaduh, Ki, "lhu %=, %M0" }, + { Oloadsw, Ki, "lw %=, %M0" }, + /* riscv64 always sign-extends 32-bit + * values stored in 64-bit registers + */ + { Oloaduw, Kw, "lw %=, %M0" }, + { Oloaduw, Kl, "lwu %=, %M0" }, + { Oload, Kw, "lw %=, %M0" }, + { Oload, Kl, "ld %=, %M0" }, + { Oload, Ks, "flw %=, %M0" }, + { Oload, Kd, "fld %=, %M0" }, + { Oextsb, Ki, "sext.b %=, %0" }, + { Oextub, Ki, "zext.b %=, %0" }, + { Oextsh, Ki, "sext.h %=, %0" }, + { Oextuh, Ki, "zext.h %=, %0" }, + { Oextsw, Kl, "sext.w %=, %0" }, + { Oextuw, Kl, "zext.w %=, %0" }, + { Otruncd, Ks, "fcvt.s.d %=, %0" }, + { Oexts, Kd, "fcvt.d.s %=, %0" }, + { Ostosi, Kw, "fcvt.w.s %=, %0, rtz" }, + { Ostosi, Kl, "fcvt.l.s %=, %0, rtz" }, + { Ostoui, Kw, "fcvt.wu.s %=, %0, rtz" }, + { Ostoui, Kl, "fcvt.lu.s %=, %0, rtz" }, + { Odtosi, Kw, "fcvt.w.d %=, %0, rtz" }, + { Odtosi, Kl, "fcvt.l.d %=, %0, rtz" }, + { Odtoui, Kw, "fcvt.wu.d %=, %0, rtz" }, + { Odtoui, Kl, "fcvt.lu.d %=, %0, rtz" }, + { Oswtof, Ka, "fcvt.%k.w %=, %0" }, + { Ouwtof, Ka, "fcvt.%k.wu %=, %0" }, + { Osltof, Ka, "fcvt.%k.l %=, %0" }, + { Oultof, Ka, "fcvt.%k.lu %=, %0" }, + { Ocast, Kw, "fmv.x.w %=, %0" }, + { Ocast, Kl, "fmv.x.d %=, %0" }, + { Ocast, Ks, "fmv.w.x %=, %0" }, + { Ocast, Kd, "fmv.d.x %=, %0" }, + { Ocopy, Ki, "mv %=, %0" }, + { Ocopy, Ka, "fmv.%k %=, %0" }, + { Oswap, Ki, "mv %?, %0\n\tmv %0, %1\n\tmv %1, %?" }, + { Oreqz, Ki, "seqz %=, %0" }, + { Ornez, Ki, "snez %=, %0" }, + { Ocall, Kw, "jalr %0" }, + { NOp, 0, 0 } +}; + +static char *rname[] = { + [FP] = "fp", + [SP] = "sp", + [GP] = "gp", + [TP] = "tp", + [RA] = "ra", + [T6] = "t6", + [T0] = "t0", "t1", "t2", "t3", "t4", "t5", + [A0] = "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + [S1] = "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", + + [FT0] = "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", + [FA0] = "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", + [FS0] = "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", "fs8", "fs9", "fs10", "fs11", +}; + +static int64_t +slot(int s, Fn *fn) +{ + s = ((int32_t)s << 3) >> 3; + assert(s <= fn->slot); + if (s < 0) + return 8 * -s; + else + return -4 * (fn->slot - s); +} + +static void +emitaddr(Con *c, FILE *f) +{ + char off[32], *p; + + if (c->bits.i) + sprintf(off, "+%"PRIi64, c->bits.i); + else + off[0] = 0; + p = c->local ? ".L" : ""; + fprintf(f, "%s%s%s", p, str(c->label), off); +} + +static void +emitf(char *s, Ins *i, Fn *fn, FILE *f) +{ + static char clschr[] = {'w', 'l', 's', 'd'}; + Ref r; + int k, c; + Con *pc; + int64_t offset; + + fputc('\t', f); + for (;;) { + k = i->cls; + while ((c = *s++) != '%') + if (!c) { + fputc('\n', f); + return; + } else + fputc(c, f); + switch ((c = *s++)) { + default: + die("invalid escape"); + case '?': + if (KBASE(k) == 0) + fputs("t6", f); + else + abort(); + break; + case 'k': + if (i->cls != Kl) + fputc(clschr[i->cls], f); + break; + case '=': + case '0': + r = c == '=' ? i->to : i->arg[0]; + assert(isreg(r)); + fputs(rname[r.val], f); + break; + case '1': + r = i->arg[1]; + switch (rtype(r)) { + default: + die("invalid second argument"); + case RTmp: + assert(isreg(r)); + fputs(rname[r.val], f); + break; + case RCon: + pc = &fn->con[r.val]; + assert(pc->type == CBits); + assert(pc->bits.i >= -2048 && pc->bits.i <= 2047); + fprintf(f, "%d", (int)pc->bits.i); + break; + } + break; + case 'M': + c = *s++; + assert(c == '0' || c == '1'); + r = i->arg[c - '0']; + switch (rtype(r)) { + default: + die("invalid address argument"); + case RTmp: + fprintf(f, "0(%s)", rname[r.val]); + break; + case RCon: + pc = &fn->con[r.val]; + assert(pc->type == CAddr); + emitaddr(pc, f); + if (isstore(i->op) + || (isload(i->op) && KBASE(i->cls) == 1)) { + /* store (and float load) + * pseudo-instructions need a + * temporary register in which to + * load the address + */ + fprintf(f, ", t6"); + } + break; + case RSlot: + offset = slot(r.val, fn); + assert(offset >= -2048 && offset <= 2047); + fprintf(f, "%d(fp)", (int)offset); + break; + } + break; + } + } +} + +static void +loadcon(Con *c, int r, int k, FILE *f) +{ + char *rn; + int64_t n; + int w; + + w = KWIDE(k); + rn = rname[r]; + switch (c->type) { + case CAddr: + fprintf(f, "\tla %s, ", rn); + emitaddr(c, f); + fputc('\n', f); + break; + case CBits: + n = c->bits.i; + if (!w) + n = (int32_t)n; + fprintf(f, "\tli %s, %"PRIu64"\n", rn, n); + break; + default: + die("invalid constant"); + } +} + +static void +fixslot(Ref *pr, Fn *fn, FILE *f) +{ + Ref r; + int64_t s; + + r = *pr; + if (rtype(r) == RSlot) { + s = slot(r.val, fn); + if (s < -2048 || s > 2047) { + fprintf(f, "\tli t6, %"PRId64"\n", s); + fprintf(f, "\tadd t6, fp, t6\n"); + *pr = TMP(T6); + } + } +} + +static void +emitins(Ins *i, Fn *fn, FILE *f) +{ + int o; + char *rn; + int64_t s; + Con *con; + + switch (i->op) { + default: + if (isload(i->op)) + fixslot(&i->arg[0], fn, f); + else if (isstore(i->op)) + fixslot(&i->arg[1], fn, f); + Table: + /* most instructions are just pulled out of + * the table omap[], some special cases are + * detailed below */ + for (o=0;; o++) { + /* this linear search should really be a binary + * search */ + if (omap[o].op == NOp) + die("no match for %s(%c)", + optab[i->op].name, "wlsd"[i->cls]); + if (omap[o].op == i->op) + if (omap[o].cls == i->cls || omap[o].cls == Ka + || (omap[o].cls == Ki && KBASE(i->cls) == 0)) + break; + } + emitf(omap[o].asm, i, fn, f); + break; + case Ocopy: + if (req(i->to, i->arg[0])) + break; + if (rtype(i->to) == RSlot) { + switch (rtype(i->arg[0])) { + case RSlot: + case RCon: + die("unimplemented"); + break; + default: + assert(isreg(i->arg[0])); + i->arg[1] = i->to; + i->to = R; + switch (i->cls) { + case Kw: i->op = Ostorew; break; + case Kl: i->op = Ostorel; break; + case Ks: i->op = Ostores; break; + case Kd: i->op = Ostored; break; + } + fixslot(&i->arg[1], fn, f); + goto Table; + } + break; + } + assert(isreg(i->to)); + switch (rtype(i->arg[0])) { + case RCon: + loadcon(&fn->con[i->arg[0].val], i->to.val, i->cls, f); + break; + case RSlot: + i->op = Oload; + fixslot(&i->arg[0], fn, f); + goto Table; + default: + assert(isreg(i->arg[0])); + goto Table; + } + break; + case Onop: + break; + case Oaddr: + assert(rtype(i->arg[0]) == RSlot); + rn = rname[i->to.val]; + s = slot(i->arg[0].val, fn); + if (-s < 2048) { + fprintf(f, "\tadd %s, fp, %"PRId64"\n", rn, s); + } else { + fprintf(f, + "\tli %s, %"PRId64"\n" + "\tadd %s, fp, %s\n", + rn, s, rn, rn + ); + } + break; + case Ocall: + switch (rtype(i->arg[0])) { + case RCon: + con = &fn->con[i->arg[0].val]; + if (con->type != CAddr || con->bits.i) + goto invalid; + fprintf(f, "\tcall %s\n", str(con->label)); + break; + case RTmp: + emitf("jalr %0", i, fn, f); + break; + default: + invalid: + die("invalid call argument"); + } + break; + case Osalloc: + emitf("sub sp, sp, %0", i, fn, f); + if (!req(i->to, R)) + emitf("mv %=, sp", i, fn, f); + break; + } +} + +/* + + Stack-frame layout: + + +=============+ + | varargs | + | save area | + +-------------+ + | saved ra | + | saved fp | + +-------------+ <- fp + | ... | + | spill slots | + | ... | + +-------------+ + | ... | + | locals | + | ... | + +-------------+ + | padding | + +-------------+ + | callee-save | + | registers | + +=============+ + +*/ + +void +rv64_emitfn(Fn *fn, FILE *f) +{ + static int id0; + int lbl, neg, off, frame, *pr, r; + Blk *b, *s; + Ins *i; + + gasemitlnk(fn->name, &fn->lnk, ".text", f); + + if (fn->vararg) { + /* TODO: only need space for registers unused by named arguments */ + fprintf(f, "\tadd sp, sp, -64\n"); + for (r = A0; r <= A7; r++) + fprintf(f, "\tsd %s, %d(sp)\n", rname[r], 8 * (r - A0)); + } + fprintf(f, "\tsd fp, -16(sp)\n"); + fprintf(f, "\tsd ra, -8(sp)\n"); + fprintf(f, "\tadd fp, sp, -16\n"); + + frame = (16 + 4 * fn->slot + 15) & ~15; + for (pr = rv64_rclob; *pr>=0; pr++) { + if (fn->reg & BIT(*pr)) + frame += 8; + } + frame = (frame + 15) & ~15; + + if (frame <= 2048) + fprintf(f, "\tadd sp, sp, -%d\n", frame); + else + fprintf(f, + "\tli t6, %d\n" + "\tsub sp, sp, t6\n", + frame); + for (pr = rv64_rclob, off = 0; *pr >= 0; pr++) { + if (fn->reg & BIT(*pr)) { + fprintf(f, "\t%s %s, %d(sp)\n", *pr < FT0 ? "sd" : "fsd", rname[*pr], off); + off += 8; + } + } + + for (lbl = 0, b = fn->start; b; b=b->link) { + if (lbl || b->npred > 1) + fprintf(f, ".L%d:\n", id0+b->id); + for (i=b->ins; i!=&b->ins[b->nins]; i++) + emitins(i, fn, f); + lbl = 1; + switch (b->jmp.type) { + case Jret0: + if (fn->dynalloc) { + if (frame - 16 <= 2048) + fprintf(f, "\tadd sp, fp, -%d\n", frame - 16); + else + fprintf(f, + "\tli t6, %d\n" + "\tsub sp, sp, t6\n", + frame - 16); + } + for (pr = rv64_rclob, off = 0; *pr >= 0; pr++) { + if (fn->reg & BIT(*pr)) { + fprintf(f, "\t%s %s, %d(sp)\n", *pr < FT0 ? "ld" : "fld", rname[*pr], off); + off += 8; + } + } + fprintf(f, + "\tadd sp, fp, %d\n" + "\tld ra, 8(fp)\n" + "\tld fp, 0(fp)\n" + "\tret\n", + 16 + fn->vararg * 64 + ); + break; + case Jjmp: + Jmp: + if (b->s1 != b->link) + fprintf(f, "\tj .L%d\n", id0+b->s1->id); + else + lbl = 0; + break; + case Jjnz: + neg = 0; + if (b->link == b->s2) { + s = b->s1; + b->s1 = b->s2; + b->s2 = s; + neg = 1; + } + assert(isreg(b->jmp.arg)); + fprintf(f, "\tb%sz %s, .L%d\n", neg ? "ne" : "eq", rname[b->jmp.arg.val], id0+b->s2->id); + goto Jmp; + } + } + id0 += fn->nblk; +} diff --git a/rv64/isel.c b/rv64/isel.c new file mode 100644 index 0000000..bb6fb02 --- /dev/null +++ b/rv64/isel.c @@ -0,0 +1,278 @@ +#include "all.h" + +static int +memarg(Ref *r, int op, Ins *i) +{ + return ((isload(op) || op == Ocall) && r == &i->arg[0]) + || (isstore(op) && r == &i->arg[1]); +} + +static int +immarg(Ref *r, int op, Ins *i) +{ + return rv64_op[op].imm && r == &i->arg[1]; +} + +static void +fixarg(Ref *r, int k, Ins *i, Fn *fn) +{ + char buf[32]; + Ref r0, r1; + int s, n, op; + Con *c; + + r0 = r1 = *r; + op = i ? i->op : Ocopy; + switch (rtype(r0)) { + case RCon: + c = &fn->con[r0.val]; + if (c->type == CAddr && memarg(r, op, i)) + break; + if (c->type == CBits && immarg(r, op, i) + && -2048 <= c->bits.i && c->bits.i < 2048) + break; + r1 = newtmp("isel", k, fn); + if (KBASE(k) == 1) { + /* load floating points from memory + * slots, they can't be used as + * immediates + */ + assert(c->type == CBits); + n = gasstash(&c->bits, KWIDE(k) ? 8 : 4); + vgrow(&fn->con, ++fn->ncon); + c = &fn->con[fn->ncon-1]; + sprintf(buf, "fp%d", n); + *c = (Con){.type = CAddr, .local = 1}; + c->label = intern(buf); + emit(Oload, k, r1, CON(c-fn->con), R); + break; + } + emit(Ocopy, k, r1, r0, R); + break; + case RTmp: + if (isreg(r0)) + break; + s = fn->tmp[r0.val].slot; + if (s != -1) { + /* aggregate passed by value on + * stack, or fast local address, + * replace with slot if we can + */ + if (memarg(r, op, i)) { + r1 = SLOT(s); + break; + } + r1 = newtmp("isel", k, fn); + emit(Oaddr, k, r1, SLOT(s), R); + break; + } + if (k == Kw && fn->tmp[r0.val].cls == Kl) { + /* TODO: this sign extension isn't needed + * for 32-bit arithmetic instructions + */ + r1 = newtmp("isel", k, fn); + emit(Oextsw, Kl, r1, r0, R); + } else { + assert(k == fn->tmp[r0.val].cls); + } + break; + } + *r = r1; +} + +static void +negate(Ref *pr, Fn *fn) +{ + Ref r; + + r = newtmp("isel", Kw, fn); + emit(Oxor, Kw, *pr, r, getcon(1, fn)); + *pr = r; +} + +static void +selcmp(Ins i, int k, int op, Fn *fn) +{ + Ins *icmp; + Ref r, r0, r1; + int sign, swap, neg; + + switch (op) { + case Cieq: + r = newtmp("isel", k, fn); + emit(Oreqz, i.cls, i.to, r, R); + emit(Oxor, k, r, i.arg[0], i.arg[1]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + return; + case Cine: + r = newtmp("isel", k, fn); + emit(Ornez, i.cls, i.to, r, R); + emit(Oxor, k, r, i.arg[0], i.arg[1]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + return; + case Cisge: sign = 1, swap = 0, neg = 1; break; + case Cisgt: sign = 1, swap = 1, neg = 0; break; + case Cisle: sign = 1, swap = 1, neg = 1; break; + case Cislt: sign = 1, swap = 0, neg = 0; break; + case Ciuge: sign = 0, swap = 0, neg = 1; break; + case Ciugt: sign = 0, swap = 1, neg = 0; break; + case Ciule: sign = 0, swap = 1, neg = 1; break; + case Ciult: sign = 0, swap = 0, neg = 0; break; + case NCmpI+Cfeq: + case NCmpI+Cfge: + case NCmpI+Cfgt: + case NCmpI+Cfle: + case NCmpI+Cflt: + swap = 0, neg = 0; + break; + case NCmpI+Cfuo: + negate(&i.to, fn); + /* fallthrough */ + case NCmpI+Cfo: + r0 = newtmp("isel", i.cls, fn); + r1 = newtmp("isel", i.cls, fn); + emit(Oand, i.cls, i.to, r0, r1); + op = KWIDE(k) ? Oceqd : Oceqs; + emit(op, i.cls, r0, i.arg[0], i.arg[0]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + emit(op, i.cls, r1, i.arg[1], i.arg[1]); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); + return; + case NCmpI+Cfne: + swap = 0, neg = 1; + i.op = KWIDE(k) ? Oceqd : Oceqs; + break; + default: + assert(0 && "unknown comparison"); + } + if (op < NCmpI) + i.op = sign ? Ocsltl : Ocultl; + if (swap) { + r = i.arg[0]; + i.arg[0] = i.arg[1]; + i.arg[1] = r; + } + if (neg) + negate(&i.to, fn); + emiti(i); + icmp = curi; + fixarg(&icmp->arg[0], k, icmp, fn); + fixarg(&icmp->arg[1], k, icmp, fn); +} + +static void +sel(Ins i, Fn *fn) +{ + Ref r0, r1; + Ins *i0; + int ck, cc; + int64_t sz; + + switch (i.op) { + case Onop: + break; + case Oalloc4: + case Oalloc8: + case Oalloc16: + /* we need to make sure + * the stack remains aligned + * (rsp = 0) mod 16 + */ + fn->dynalloc = 1; + if (rtype(i.arg[0]) == RCon) { + sz = fn->con[i.arg[0].val].bits.i; + if (sz < 0) + err("invalid alloc size %"PRId64, sz); + sz = (sz + 15) & -16; + emit(Osalloc, Kl, i.to, getcon(sz, fn), R); + fixarg(&curi->arg[0], Kl, curi, fn); + } else { + /* r0 = (i.arg[0] + 15) & -16 */ + r0 = newtmp("isel", Kl, fn); + r1 = newtmp("isel", Kl, fn); + emit(Osalloc, Kl, i.to, r0, R); + emit(Oand, Kl, r0, r1, getcon(-16, fn)); + emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn)); + if (fn->tmp[i.arg[0].val].slot != -1) + err("unlikely argument %%%s in %s", + fn->tmp[i.arg[0].val].name, optab[i.op].name); + } + break; + default: + if (iscmp(i.op, &ck, &cc)) { + selcmp(i, ck, cc, fn); + break; + } + emiti(i); + i0 = curi; /* fixarg() can change curi */ + fixarg(&i0->arg[0], argcls(&i, 0), i0, fn); + fixarg(&i0->arg[1], argcls(&i, 1), i0, fn); + } +} + +static void +seljmp(Blk *b, Fn *fn) +{ + /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */ + if (b->jmp.type == Jjnz) + fixarg(&b->jmp.arg, Kw, 0, fn); +} + +void +rv64_isel(Fn *fn) +{ + Blk *b, **sb; + Ins *i; + Phi *p; + uint n; + int al; + int64_t sz; + + /* assign slots to fast allocs */ + b = fn->start; + /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ + for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) + for (i=b->ins; i<&b->ins[b->nins]; i++) + if (i->op == al) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-15) + err("invalid alloc size %"PRId64, sz); + sz = (sz + n-1) & -n; + sz /= 4; + if (sz > INT_MAX - fn->slot) + die("alloc too large"); + fn->tmp[i->to.val].slot = fn->slot; + fn->slot += sz; + *i = (Ins){.op = Onop}; + } + + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) + for (p=(*sb)->phi; p; p=p->link) { + for (n=0; p->blk[n] != b; n++) + assert(n+1 < p->narg); + fixarg(&p->arg[n], p->cls, 0, fn); + } + seljmp(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + sel(*--i, fn); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + + if (debug['I']) { + fprintf(stderr, "\n> After instruction selection:\n"); + printfn(fn, stderr); + } +} diff --git a/rv64/targ.c b/rv64/targ.c new file mode 100644 index 0000000..ead8fe2 --- /dev/null +++ b/rv64/targ.c @@ -0,0 +1,53 @@ +#include "all.h" + +Rv64Op rv64_op[NOp] = { +#define O(op, t, x) [O##op] = +#define V(imm) { imm }, +#include "../ops.h" +}; + +int rv64_rsave[] = { + T0, T1, T2, T3, T4, T5, + A0, A1, A2, A3, A4, A5, A6, A7, + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7, + FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7, + FT8, FT9, FT10, FT11, + -1 +}; +int rv64_rclob[] = { + S1, S2, S3, S4, S5, S6, S7, + S8, S9, S10, S11, + FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, + FS8, FS9, FS10, FS11, + -1 +}; + +/* T6 used as swap register (TODO: is there a better choice?) */ +#define RGLOB (BIT(FP) | BIT(SP) | BIT(GP) | BIT(TP) | BIT(RA) | BIT(T6)) + +static int +rv64_memargs(int op) +{ + (void)op; + return 0; +} + +Target T_rv64 = { + .gpr0 = T0, + .ngpr = NGPR, + .fpr0 = FT0, + .nfpr = NFPR, + .rglob = RGLOB, + .nrglob = 6, + .rsave = rv64_rsave, + .nrsave = {NGPS, NFPS}, + .retregs = rv64_retregs, + .argregs = rv64_argregs, + .memargs = rv64_memargs, + .abi = rv64_abi, + .isel = rv64_isel, + .emitfn = rv64_emitfn, +}; + +MAKESURE(rsave_size_ok, sizeof rv64_rsave == (NGPS+NFPS+1) * sizeof(int)); +MAKESURE(rclob_size_ok, sizeof rv64_rclob == (NCLR+1) * sizeof(int)); -- cgit 1.4.1