diff options
author | Quentin Carbonneaux <quentin@c9x.me> | 2017-04-08 21:31:59 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin@c9x.me> | 2017-04-08 22:09:55 -0400 |
commit | 61090c758d36f3919a222efe01c9794fdf7987ef (patch) | |
tree | b7329a9a5ef93cff5d4a96daf0cfed0e4c8247dd /arm64 | |
parent | 96836855a55cd28f1449b4a58d1e5301669350c0 (diff) | |
download | roux-61090c758d36f3919a222efe01c9794fdf7987ef.tar.gz |
new arm64 backend, yeepee
Diffstat (limited to 'arm64')
-rw-r--r-- | arm64/abi.c | 703 | ||||
-rw-r--r-- | arm64/all.h | 37 | ||||
-rw-r--r-- | arm64/emit.c | 455 | ||||
-rw-r--r-- | arm64/isel.c | 266 | ||||
-rw-r--r-- | arm64/targ.c | 51 |
5 files changed, 1512 insertions, 0 deletions
diff --git a/arm64/abi.c b/arm64/abi.c new file mode 100644 index 0000000..b340af2 --- /dev/null +++ b/arm64/abi.c @@ -0,0 +1,703 @@ +#include "all.h" + +typedef struct Class_ Class; +typedef struct Insl Insl; +typedef struct Params Params; + +enum { + Cstk = 1, /* pass on the stack */ + Cptr = 2, /* replaced by a pointer */ +}; + +struct Class_ { + char class; + char ishfa; + struct { + char base; + uchar size; + } hfa; + uint size; + Typ *t; + uchar nreg; + uchar ngp; + uchar nfp; + int reg[4]; + int cls[4]; +}; + +struct Insl { + Ins i; + Insl *link; +}; + +struct Params { + uint ngp; + uint nfp; + uint nstk; +}; + +static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7}; +static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7}; + +/* layout of call's second argument (RCall) + * + * 29 13 9 5 2 0 + * |0.00|x|xxxx|xxxx|xxx|xx| range + * | | | | ` gp regs returned (0..2) + * | | | ` fp regs returned (0..4) + * | | ` gp regs passed (0..8) + * | ` fp regs passed (0..8) + * ` is x8 used (0..1) + */ + +static int +isfloatv(Typ *t, char *cls) +{ + Field *f; + uint n; + + for (n=0; n<t->nunion; n++) + for (f=t->fields[n]; f->type != FEnd; f++) + switch (f->type) { + case Fs: + if (*cls == Kd) + return 0; + *cls = Ks; + break; + case Fd: + if (*cls == Ks) + return 0; + *cls = Kd; + break; + case FTyp: + if (isfloatv(&typ[f->len], cls)) + break; + default: + return 0; + } + return 1; +} + +static void +typclass(Class *c, Typ *t, int *gp, int *fp) +{ + uint64_t sz; + uint n; + + sz = (t->size + 7) & -8; + c->t = t; + c->class = 0; + c->ngp = 0; + c->nfp = 0; + + if (t->align > 4) + err("alignments larger than 16 are not supported"); + + if (t->dark || sz > 16 || sz == 0) { + /* large structs are replaced by a + * pointer to some caller-allocated + * memory */ + c->class |= Cptr; + c->size = 8; + return; + } + + c->size = sz; + c->hfa.base = Kx; + c->ishfa = isfloatv(t, &c->hfa.base); + c->hfa.size = t->size/(KWIDE(c->hfa.base) ? 8 : 4); + + if (c->ishfa) + for (n=0; n<c->hfa.size; n++, c->nfp++) { + c->reg[n] = *fp++; + c->cls[n] = c->hfa.base; + } + else + for (n=0; n<sz/8; n++, c->ngp++) { + c->reg[n] = *gp++; + c->cls[n] = Kl; + } + + c->nreg = n; +} + +static void +sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn) +{ + static int st[] = { + [Kw] = Ostorew, [Kl] = Ostorel, + [Ks] = Ostores, [Kd] = Ostored + }; + uint n; + uint64_t off; + Ref r; + + assert(nreg <= 4); + off = 0; + for (n=0; n<nreg; n++) { + tmp[n] = newtmp("abi", cls[n], fn); + r = newtmp("abi", Kl, fn); + emit(st[cls[n]], 0, R, tmp[n], r); + emit(Oadd, Kl, r, mem, getcon(off, fn)); + off += KWIDE(cls[n]) ? 8 : 4; + } +} + +static void +ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn) +{ + int i; + uint64_t off; + Ref r; + + off = 0; + for (i=0; i<n; i++) { + r = newtmp("abi", Kl, fn); + emit(Oload, cls[i], TMP(reg[i]), r, R); + emit(Oadd, Kl, r, mem, getcon(off, fn)); + off += KWIDE(cls[i]) ? 8 : 4; + } +} + +static void +selret(Blk *b, Fn *fn) +{ + int j, k, cty; + Ref r; + Class cr; + + j = b->jmp.type; + + if (!isret(j) || j == Jret0) + return; + + r = b->jmp.arg; + b->jmp.type = Jret0; + + if (j == Jretc) { + typclass(&cr, &typ[fn->retty], gpreg, fpreg); + cty = (cr.nfp << 2) | cr.ngp; + if (cr.class & Cptr) { + assert(rtype(fn->retr) == RTmp); + blit(fn->retr, 0, r, cr.t->size, fn); + } else + ldregs(cr.reg, cr.cls, cr.nreg, r, fn); + } else { + k = j - Jretw; + if (KBASE(k) == 0) { + emit(Ocopy, k, TMP(R0), r, R); + cty = 1; + } else { + emit(Ocopy, k, TMP(V0), r, R); + cty = 1 << 2; + } + } + + b->jmp.arg = CALL(cty); +} + +static int +argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env) +{ + int ngp, nfp, *gp, *fp; + Class *c; + Ins *i; + + gp = gpreg; + fp = fpreg; + ngp = 8; + nfp = 8; + for (i=i0, c=carg; i<i1; i++, c++) + switch (i->op) { + case Opar: + case Oarg: + c->cls[0] = i->cls; + c->size = 8; + if (KBASE(i->cls) == 0 && ngp > 0) { + ngp--; + c->reg[0] = *gp++; + break; + } + if (KBASE(i->cls) == 1 && nfp > 0) { + nfp--; + c->reg[0] = *fp++; + break; + } + c->class |= Cstk; + break; + case Oparc: + case Oargc: + typclass(c, &typ[i->arg[0].val], gp, fp); + if (c->class & Cptr) { + if (ngp > 0) { + ngp--; + c->reg[0] = *gp++; + c->cls[0] = Kl; + break; + } + } else if (c->ngp <= ngp) { + if (c->nfp <= nfp) { + ngp -= c->ngp; + nfp -= c->nfp; + gp += c->ngp; + fp += c->nfp; + break; + } else + nfp = 0; + } else + ngp = 0; + c->class |= Cstk; + break; + case Opare: + *env = i->to; + break; + case Oarge: + *env = i->arg[0]; + break; + } + + return ((gp-gpreg) << 5) | ((fp-fpreg) << 9); +} + +bits +arm64_retregs(Ref r, int p[2]) +{ + bits b; + int ngp, nfp; + + assert(rtype(r) == RCall); + ngp = r.val & 3; + nfp = (r.val >> 2) & 7; + if (p) { + p[0] = ngp; + p[1] = nfp; + } + b = 0; + while (ngp--) + b |= BIT(R0+ngp); + while (nfp--) + b |= BIT(V0+nfp); + return b; +} + +bits +arm64_argregs(Ref r, int p[2]) +{ + bits b; + int ngp, nfp, x8; + + assert(rtype(r) == RCall); + ngp = (r.val >> 5) & 15; + nfp = (r.val >> 9) & 15; + x8 = (r.val >> 13) & 1; + if (p) { + p[0] = ngp + x8; + p[1] = nfp; + } + b = 0; + while (ngp--) + b |= BIT(R0+ngp); + while (nfp--) + b |= BIT(V0+nfp); + return b | ((bits)x8 << R8); +} + +static void +stkblob(Ref r, Class *c, Fn *fn, Insl **ilp) +{ + Insl *il; + int al; + + il = alloc(sizeof *il); + al = c->t->align - 2; /* NAlign == 3 */ + if (al < 0) + al = 0; + il->i = (Ins){ + Oalloc + al, r, + {getcon(c->t->size, fn)}, Kl + }; + il->link = *ilp; + *ilp = il; +} + +static void +selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) +{ + Ins *i; + Class *ca, *c, cr; + int cty, envc; + uint n; + uint64_t stk, off; + Ref r, rstk, env, tmp[4]; + + env = R; + ca = alloc((i1-i0) * sizeof ca[0]); + cty = argsclass(i0, i1, ca, &env); + + stk = 0; + for (i=i0, c=ca; i<i1; i++, c++) { + if (c->class & Cptr) { + i->arg[0] = newtmp("abi", Kl, fn); + stkblob(i->arg[0], c, fn, ilp); + i->op = Oarg; + } + if (c->class & Cstk) + stk += c->size; + } + stk += stk & 15; + rstk = getcon(stk, fn); + if (stk) + emit(Oadd, Kl, TMP(SP), TMP(SP), rstk); + + if (!req(i1->arg[1], R)) { + typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg); + stkblob(i1->to, &cr, fn, ilp); + cty |= (cr.nfp << 2) | cr.ngp; + if (cr.class & Cptr) { + cty |= 1 << 13; + emit(Ocopy, Kw, R, TMP(R0), R); + } else { + sttmps(tmp, cr.cls, cr.nreg, i1->to, fn); + for (n=0; n<cr.nreg; n++) { + r = TMP(cr.reg[n]); + emit(Ocopy, cr.cls[n], tmp[n], r, R); + } + } + } else { + if (KBASE(i1->cls) == 0) { + emit(Ocopy, i1->cls, i1->to, TMP(R0), R); + cty |= 1; + } else { + emit(Ocopy, i1->cls, i1->to, TMP(V0), R); + cty |= 1 << 2; + } + } + + envc = !req(R, env); + if (envc) + die("todo (arm abi): env calls"); + emit(Ocall, 0, R, i1->arg[0], CALL(cty)); + + if (cty & (1 << 13)) + /* struct return argument */ + emit(Ocopy, Kl, TMP(R8), i1->to, R); + + for (i=i0, c=ca; i<i1; i++, c++) { + if ((c->class & Cstk) != 0) + continue; + if (i->op != Oargc) + emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R); + else + ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn); + } + + off = 0; + for (i=i0, c=ca; i<i1; i++, c++) { + if ((c->class & Cstk) == 0) + continue; + if (i->op != Oargc) { + r = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, i->arg[0], r); + emit(Oadd, Kl, r, TMP(SP), getcon(off, fn)); + } else + blit(TMP(SP), off, i->arg[1], c->size, fn); + off += c->size; + } + if (stk) + emit(Osub, Kl, TMP(SP), TMP(SP), rstk); + + for (i=i0, c=ca; i<i1; i++, c++) + if (c->class & Cptr) + blit(i->arg[0], 0, i->arg[1], c->t->size, fn); +} + +static Params +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + Class *ca, *c, cr; + Insl *il; + Ins *i; + int n, s, cty; + Ref r, env, tmp[16], *t; + + env = R; + ca = alloc((i1-i0) * sizeof ca[0]); + curi = &insb[NIns]; + + cty = argsclass(i0, i1, ca, &env); + + il = 0; + t = tmp; + for (i=i0, c=ca; i<i1; i++, c++) { + if (i->op != Oparc || (c->class & (Cptr|Cstk))) + continue; + sttmps(t, c->cls, c->nreg, i->to, fn); + stkblob(i->to, c, fn, &il); + t += c->nreg; + } + for (; il; il=il->link) + emiti(il->i); + + if (fn->retty >= 0) { + typclass(&cr, &typ[fn->retty], gpreg, fpreg); + if (cr.class & Cptr) { + fn->retr = newtmp("abi", Kl, fn); + emit(Ocopy, Kl, fn->retr, TMP(R8), R); + } + } + + t = tmp; + for (i=i0, c=ca, s=2; i<i1; i++, c++) { + if (i->op == Oparc + && (c->class & Cptr) == 0) { + if (c->class & Cstk) { + fn->tmp[i->to.val].slot = -s; + s += c->size / 8; + } else + for (n=0; n<c->nreg; n++) { + r = TMP(c->reg[n]); + emit(Ocopy, c->cls[n], *t++, r, R); + } + } else if (c->class & Cstk) { + r = newtmp("abi", Kl, fn); + emit(Oload, *c->cls, i->to, r, R); + emit(Oaddr, Kl, r, SLOT(-s), R); + s++; + } else { + r = TMP(*c->reg); + emit(Ocopy, *c->cls, i->to, r, R); + } + } + + if (!req(R, env)) + die("todo (arm abi): env calls"); + + return (Params){ + .nstk = s - 2, + .ngp = (cty >> 5) & 15, + .nfp = (cty >> 9) & 15 + }; +} + +static Blk * +split(Fn *fn, Blk *b) +{ + Blk *bn; + + ++fn->nblk; + bn = blknew(); + bn->nins = &insb[NIns] - curi; + idup(&bn->ins, curi, bn->nins); + curi = &insb[NIns]; + bn->visit = ++b->visit; + snprintf(bn->name, NString, "%s.%d", b->name, b->visit); + bn->loop = b->loop; + bn->link = b->link; + b->link = bn; + return bn; +} + +static void +chpred(Blk *b, Blk *bp, Blk *bp1) +{ + Phi *p; + uint a; + + for (p=b->phi; p; p=p->link) { + for (a=0; p->blk[a]!=bp; a++) + assert(a+1<p->narg); + p->blk[a] = bp1; + } +} + +static void +selvaarg(Fn *fn, Blk *b, Ins *i) +{ + Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap; + Blk *b0, *bstk, *breg; + int isgp; + + c8 = getcon(8, fn); + c16 = getcon(16, fn); + c24 = getcon(24, fn); + c28 = getcon(28, fn); + ap = i->arg[0]; + isgp = KBASE(i->cls) == 0; + + /* @b [...] + r0 =l add ap, (24 or 28) + nr =l loadsw r0 + r1 =w csltw nr, 0 + jnz r1, @breg, @bstk + @breg + r0 =l add ap, (8 or 16) + r1 =l loadl r0 + lreg =l add r1, nr + r0 =w add nr, (8 or 16) + r1 =l add ap, (24 or 28) + storew r0, r1 + @bstk + lstk =l loadl ap + r0 =l add lstk, 8 + storel r0, ap + @b0 + %loc =l phi @breg %lreg, @bstk %lstk + i->to =(i->cls) load %loc + */ + + loc = newtmp("abi", Kl, fn); + emit(Oload, i->cls, i->to, loc, R); + b0 = split(fn, b); + b0->jmp = b->jmp; + b0->s1 = b->s1; + b0->s2 = b->s2; + if (b->s1) + chpred(b->s1, b, b0); + if (b->s2 && b->s2 != b->s1) + chpred(b->s2, b, b0); + + lreg = newtmp("abi", Kl, fn); + nr = newtmp("abi", Kl, fn); + r0 = newtmp("abi", Kw, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorew, Kw, R, r0, r1); + emit(Oadd, Kl, r1, ap, isgp ? c24 : c28); + emit(Oadd, Kw, r0, nr, isgp ? c8 : c16); + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Oadd, Kl, lreg, r1, nr); + emit(Oload, Kl, r1, r0, R); + emit(Oadd, Kl, r0, ap, isgp ? c8 : c16); + breg = split(fn, b); + breg->jmp.type = Jjmp; + breg->s1 = b0; + + lstk = newtmp("abi", Kl, fn); + r0 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r0, ap); + emit(Oadd, Kl, r0, lstk, c8); + emit(Oload, Kl, lstk, ap, R); + bstk = split(fn, b); + bstk->jmp.type = Jjmp; + bstk->s1 = b0; + + b0->phi = alloc(sizeof *b0->phi); + *b0->phi = (Phi){ + .cls = Kl, .to = loc, + .narg = 2, + .blk = {bstk, breg}, + .arg = {lstk, lreg}, + }; + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kw, fn); + b->jmp.type = Jjnz; + b->jmp.arg = r1; + b->s1 = breg; + b->s2 = bstk; + emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z); + emit(Oloadsw, Kl, nr, r0, R); + emit(Oadd, Kl, r0, ap, isgp ? c24 : c28); +} + +static void +selvastart(Fn *fn, Params p, Ref ap) +{ + Ref r0, r1, rsave; + + rsave = newtmp("abi", Kl, fn); + + r0 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r0, ap); + emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn)); + + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, rsave, getcon(64, fn)); + emit(Oadd, Kl, r0, ap, getcon(8, fn)); + + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, rsave, getcon(192, fn)); + emit(Oaddr, Kl, rsave, SLOT(-1), R); + emit(Oadd, Kl, r0, ap, getcon(16, fn)); + + r0 = newtmp("abi", Kl, fn); + emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0); + emit(Oadd, Kl, r0, ap, getcon(24, fn)); + + r0 = newtmp("abi", Kl, fn); + emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0); + emit(Oadd, Kl, r0, ap, getcon(28, fn)); +} + +void +arm64_abi(Fn *fn) +{ + Blk *b; + Ins *i, *i0, *ip; + Insl *il; + int n; + Params p; + + for (b=fn->start; b; b=b->link) + b->visit = 0; + + /* lower parameters */ + for (b=fn->start, i=b->ins; i-b->ins<b->nins; i++) + if (!ispar(i->op)) + break; + p = selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (&insb[NIns] - curi); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, curi, &insb[NIns] - curi); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower calls, returns, and vararg instructions */ + il = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + if (b->visit) + continue; + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + switch ((--i)->op) { + default: + emiti(*i); + break; + case Ocall: + case Ovacall: + for (i0=i; i0>b->ins; i0--) + if (!isarg((i0-1)->op)) + break; + selcall(fn, i0, i, &il); + i = i0; + break; + case Ovastart: + selvastart(fn, p, i->arg[0]); + break; + case Ovaarg: + selvaarg(fn, b, i); + break; + case Oarg: + case Oargc: + die("unreachable"); + } + if (b == fn->start) + for (; il; il=il->link) + emiti(il->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After ABI lowering:\n"); + printfn(fn, stderr); + } +} diff --git a/arm64/all.h b/arm64/all.h new file mode 100644 index 0000000..0b3073a --- /dev/null +++ b/arm64/all.h @@ -0,0 +1,37 @@ +#include "../all.h" + +enum Arm64Reg { + R0 = RXX + 1, + R1, R2, R3, R4, R5, R6, R7, + R8, R9, R10, R11, R12, R13, R14, R15, + IP0, IP1, R18, R19, R20, R21, R22, R23, + R24, R25, R26, R27, R28, FP, LR, SP, + + V0, V1, V2, V3, V4, V5, V6, V7, + V8, V9, V10, V11, V12, V13, V14, V15, + V16, V17, V18, V19, V20, V21, V22, V23, + V24, V25, V26, V27, V28, V29, V30, /* V31, */ + + NFPR = V30 - V0 + 1, + NGPR = SP - R0 + 1, + NGPS = R18 - R0 + 1, + NFPS = (V7 - V0 + 1) + (V30 - V16 + 1), + NCLR = (R28 - R19 + 1) + (V15 - V8 + 1), +}; +MAKESURE(reg_not_tmp, V30 < (int)Tmp0); + +/* targ.c */ +extern int arm64_rsave[]; +extern int arm64_rclob[]; + +/* abi.c */ +bits arm64_retregs(Ref, int[2]); +bits arm64_argregs(Ref, int[2]); +void arm64_abi(Fn *); + +/* isel.c */ +int arm64_logimm(uint64_t, int); +void arm64_isel(Fn *); + +/* emit.c */ +void arm64_emitfn(Fn *, FILE *); diff --git a/arm64/emit.c b/arm64/emit.c new file mode 100644 index 0000000..1b71179 --- /dev/null +++ b/arm64/emit.c @@ -0,0 +1,455 @@ +#include "all.h" + +typedef struct E E; + +struct E { + FILE *f; + Fn *fn; + uint64_t frame; + uint padding; +}; + +#define CMP(X) \ + X(Cieq, "eq") \ + X(Cine, "ne") \ + X(Cisge, "ge") \ + X(Cisgt, "gt") \ + X(Cisle, "le") \ + X(Cislt, "lt") \ + X(Ciuge, "cs") \ + X(Ciugt, "hi") \ + X(Ciule, "ls") \ + X(Ciult, "cc") \ + X(NCmpI+Cfeq, "eq") \ + X(NCmpI+Cfge, "ge") \ + X(NCmpI+Cfgt, "gt") \ + X(NCmpI+Cfle, "ls") \ + X(NCmpI+Cflt, "mi") \ + X(NCmpI+Cfne, "ne") \ + X(NCmpI+Cfo, "vc") \ + X(NCmpI+Cfuo, "vs") + +enum { + Ki = -1, /* matches Kw and Kl */ + Ka = -2, /* matches all classes */ +}; + +static struct { + short op; + short cls; + char *asm; +} omap[] = { + { Oadd, Ki, "add %=, %0, %1" }, + { Oadd, Ka, "fadd %=, %0, %1" }, + { Osub, Ki, "sub %=, %0, %1" }, + { Osub, Ka, "fsub %=, %0, %1" }, + { Oand, Ki, "and %=, %0, %1" }, + { Oor, Ki, "orr %=, %0, %1" }, + { Oxor, Ki, "eor %=, %0, %1" }, + { Osar, Ki, "asr %=, %0, %1" }, + { Oshr, Ki, "lsr %=, %0, %1" }, + { Oshl, Ki, "lsl %=, %0, %1" }, + { Omul, Ki, "mul %=, %0, %1" }, + { Omul, Ka, "fmul %=, %0, %1" }, + { Odiv, Ki, "sdiv %=, %0, %1" }, + { Odiv, Ka, "fdiv %=, %0, %1" }, + { Oudiv, Ki, "udiv %=, %0, %1" }, + { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, + { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, + { Ocopy, Ki, "mov %=, %0" }, + { Ocopy, Ka, "fmov %=, %0" }, + { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" }, + { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" }, + { Ostoreb, Kw, "strb %W0, %M1" }, + { Ostoreh, Kw, "strh %W0, %M1" }, + { Ostorew, Kw, "str %W0, %M1" }, + { Ostorel, Kw, "str %L0, %M1" }, + { Ostores, Kw, "str %S0, %M1" }, + { Ostored, Kw, "str %D0, %M1" }, + { Oloadsb, Ki, "ldrsb %=, %M0" }, + { Oloadub, Ki, "ldrb %=, %M0" }, + { Oloadsh, Ki, "ldrsh %=, %M0" }, + { Oloaduh, Ki, "ldrh %=, %M0" }, + { Oloadsw, Kw, "ldr %=, %M0" }, + { Oloadsw, Kl, "ldrsw %=, %M0" }, + { Oloaduw, Ki, "ldr %W=, %M0" }, + { Oload, Ka, "ldr %=, %M0" }, + { Oextsb, Ki, "sxtb %=, %W0" }, + { Oextub, Ki, "uxtb %W=, %W0" }, + { Oextsh, Ki, "sxth %=, %W0" }, + { Oextuh, Ki, "uxth %W=, %W0" }, + { Oextsw, Ki, "sxtw %L=, %W0" }, + { Oextuw, Ki, "mov %W=, %W0" }, + { Oexts, Kd, "fcvt %=, %S0" }, + { Ocast, Kw, "fmov %=, %S0" }, + { Ocast, Kl, "fmov %=, %D0" }, + { Ocast, Ks, "fmov %=, %W0" }, + { Ocast, Kd, "fmov %=, %L0" }, + { Ostosi, Ka, "fcvtzs %=, %S0" }, + { Odtosi, Ka, "fcvtzs %=, %D0" }, + { Oswtof, Ka, "scvtf %=, %W0" }, + { Osltof, Ka, "scvtf %=, %L0" }, + { Ocall, Kw, "blr %L0" }, + + { Oacmp, Ki, "cmp %0, %1" }, + { Oacmn, Ki, "cmn %0, %1" }, + { Oafcmp, Ka, "fcmpe %0, %1" }, + +#define X(c, str) \ + { Oflag+c, Ki, "cset %=, " str }, + CMP(X) +#undef X + { NOp, 0, 0 } +}; + +static char * +rname(int r, int k) +{ + static char buf[4]; + + if (r == SP) { + assert(k == Kl); + sprintf(buf, "sp"); + } + else if (R0 <= r && r <= LR) + switch (k) { + default: die("invalid class"); + case Kw: sprintf(buf, "w%d", r-R0); break; + case Kx: + case Kl: sprintf(buf, "x%d", r-R0); break; + } + else if (V0 <= r && r <= V30) + switch (k) { + default: die("invalid class"); + case Ks: sprintf(buf, "s%d", r-V0); break; + case Kx: + case Kd: sprintf(buf, "d%d", r-V0); break; + } + else + die("invalid register"); + return buf; +} + +static uint64_t +slot(int s, E *e) +{ + s = ((int32_t)s << 3) >> 3; + if (s == -1) + return 16 + e->frame; + if (s < 0) { + if (e->fn->vararg) + return 16 + e->frame + 192 - (s+2)*8; + else + return 16 + e->frame - (s+2)*8; + } else + return 16 + e->padding + 4 * s; +} + +static void +emitf(char *s, Ins *i, E *e) +{ + Ref r; + int k, c; + Con *pc; + unsigned n, sp; + + fputc('\t', e->f); + + sp = 0; + for (;;) { + k = i->cls; + while ((c = *s++) != '%') + if (c == ' ' && !sp) { + fputc('\t', e->f); + sp = 1; + } else if ( !c) { + fputc('\n', e->f); + return; + } else + fputc(c, e->f); + Switch: + switch ((c = *s++)) { + default: + die("invalid escape"); + case 'W': + k = Kw; + goto Switch; + case 'L': + k = Kl; + goto Switch; + case 'S': + k = Ks; + goto Switch; + case 'D': + k = Kd; + goto Switch; + case '?': + if (KBASE(k) == 0) + fputs(rname(R18, k), e->f); + else + fputs(k==Ks ? "s31" : "d31", e->f); + break; + case '=': + case '0': + r = c == '=' ? i->to : i->arg[0]; + assert(isreg(r)); + fputs(rname(r.val, k), e->f); + break; + case '1': + r = i->arg[1]; + switch (rtype(r)) { + default: + die("invalid second argument"); + case RTmp: + assert(isreg(r)); + fputs(rname(r.val, k), e->f); + break; + case RCon: + pc = &e->fn->con[r.val]; + n = pc->bits.i; + assert(pc->type == CBits); + if (n & 0xfff000) + fprintf(e->f, "#%u, lsl #12", n>>12); + else + fprintf(e->f, "#%u", n); + break; + } + break; + case 'M': + c = *s++; + assert(c == '0' || c == '1'); + r = i->arg[c - '0']; + assert(isreg(r) && "TODO emit non reg addresses"); + fprintf(e->f, "[%s]", rname(r.val, Kl)); + break; + } + } +} + +static void +loadcon(Con *c, int r, int k, FILE *f) +{ + char *rn, *p, off[32]; + int64_t n; + int w, sh; + + w = KWIDE(k); + rn = rname(r, k); + n = c->bits.i; + if (c->type == CAddr) { + rn = rname(r, Kl); + if (n) + sprintf(off, "+%"PRIi64, n); + else + off[0] = 0; + p = c->local ? ".L" : ""; + fprintf(f, "\tadrp\t%s, %s%s%s\n", + rn, p, c->label, off); + fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n", + rn, rn, p, c->label, off); + return; + } + assert(c->type == CBits); + if (!w) + n = (int32_t)n; + if ((n | 0xffff) == -1 || arm64_logimm(n, k)) { + fprintf(f, "\tmov\t%s, #%"PRIi64"\n", rn, n); + } else { + fprintf(f, "\tmov\t%s, #%d\n", + rn, (int)(n & 0xffff)); + for (sh=16; n>>=16; sh+=16) { + if ((!w && sh == 32) || sh == 64) + break; + fprintf(f, "\tmovk\t%s, #0x%x, lsl #%d\n", + rn, (unsigned)(n & 0xffff), sh); + } + } +} + +static void +emitins(Ins *i, E *e) +{ + int o; + + switch (i->op) { + default: + Table: + /* most instructions are just pulled out of + * the table omap[], some special cases are + * detailed below */ + for (o=0;; o++) { + /* this linear search should really be a binary + * search */ + if (omap[o].op == NOp) + die("no match for %s(%c)", + optab[i->op].name, "wlsd"[i->cls]); + if (omap[o].op == i->op) + if (omap[o].cls == i->cls || omap[o].cls == Ka + || (omap[o].cls == Ki && KBASE(i->cls) == 0)) + break; + } + emitf(omap[o].asm, i, e); + break; + case Onop: + break; + case Ocopy: + if (req(i->to, i->arg[0])) + break; + if (rtype(i->arg[0]) != RCon) + goto Table; + loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f); + break; + case Oaddr: + assert(rtype(i->arg[0]) == RSlot); + fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", + rname(i->to.val, Kl), slot(i->arg[0].val, e) + ); + break; + } +} + +static void +framelayout(E *e) +{ + int *r; + uint o; + uint64_t f; + + for (o=0, r=arm64_rclob; *r>=0; r++) + o += 1 & (e->fn->reg >> *r); + f = e->fn->slot; + f = (f + 3) & -4; + o += o & 1; + e->padding = 4*(f-e->fn->slot); + e->frame = 4*f + 8*o; +} + +/* + + Stack-frame layout: + + +=============+ + | varargs | + | save area | + +-------------+ + | callee-save | ^ + | registers | | + +-------------+ | + | ... | | + | spill slots | | + | ... | | e->frame + +-------------+ | + | ... | | + | locals | | + | ... | | + +-------------+ | + | e->padding | v + +-------------+ + | saved x29 | + | saved x30 | + +=============+ <- x29 + +*/ + +void +arm64_emitfn(Fn *fn, FILE *out) +{ + static char *ctoa[] = { + #define X(c, s) [c] = s, + CMP(X) + #undef X + }; + static int id0; + int n, c, lbl, *r; + uint64_t o; + Blk *b, *s; + Ins *i; + E *e; + + e = &(E){.f = out, .fn = fn}; + framelayout(e); + + fprintf(e->f, ".text\n"); + if (e->fn->export) + fprintf(e->f, ".globl %s\n", e->fn->name); + fprintf(e->f, "%s:\n", e->fn->name); + + if (e->fn->vararg) { + for (n=7; n>=0; n--) + fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n); + for (n=7; n>=0; n--) + fprintf(e->f, "\tstr\tx%d, [sp, -8]!\n", n); + } + + if (e->frame + 16 > 512) + fprintf(e->f, + "\tsub\tsp, sp, #%"PRIu64"\n" + "\tstp\tx29, x30, [sp, -16]!\n", + e->frame + ); + else + fprintf(e->f, + "\tstp\tx29, x30, [sp, -%"PRIu64"]!\n", + e->frame + 16 + ); + fputs("\tadd\tx29, sp, 0\n", e->f); + for (o=e->frame+16, r=arm64_rclob; *r>=0; r++) + if (e->fn->reg & BIT(*r)) + fprintf(e->f, + "\tstr\t%s, [sp, %"PRIu64"]\n", + rname(*r, Kx), o -= 8 + ); + + for (lbl=0, b=e->fn->start; b; b=b->link) { + if (lbl || b->npred > 1) + fprintf(e->f, ".L%d:\n", id0+b->id); + for (i=b->ins; i!=&b->ins[b->nins]; i++) + emitins(i, e); + lbl = 1; + switch (b->jmp.type) { + case Jret0: + for (o=e->frame+16, r=arm64_rclob; *r>=0; r++) + if (e->fn->reg & BIT(*r)) + fprintf(e->f, + "\tldr\t%s, [sp, %"PRIu64"]\n", + rname(*r, Kx), o -= 8 + ); + o = e->frame + 16; + if (e->fn->vararg) + o += 192; + if (o > 504) + fprintf(e->f, + "\tldp\tx29, x30, [sp], 16\n" + "\tadd\tsp, sp, #%"PRIu64"\n", + o - 16 + ); + else + fprintf(e->f, + "\tldp\tx29, x30, [sp], %"PRIu64"\n", + o + ); + fprintf(e->f, "\tret\n"); + break; + case Jjmp: + Jmp: + if (b->s1 != b->link) + fprintf(e->f, "\tb\t.L%d\n", id0+b->s1->id); + else + lbl = 0; + break; + default: + c = b->jmp.type - Jjf; + if (c < 0 || c > NCmp) + die("unhandled jump %d", b->jmp.type); + if (b->link == b->s2) { + s = b->s1; + b->s1 = b->s2; + b->s2 = s; + } else + c = cmpneg(c); + fprintf(e->f, "\tb%s\t.L%d\n", ctoa[c], id0+b->s2->id); + goto Jmp; + } + } + id0 += e->fn->nblk; +} diff --git a/arm64/isel.c b/arm64/isel.c new file mode 100644 index 0000000..2d4e995 --- /dev/null +++ b/arm64/isel.c @@ -0,0 +1,266 @@ +#include "all.h" + +enum Imm { + Iother, + Iplo12, + Iphi12, + Iplo24, + Inlo12, + Inhi12, + Inlo24 +}; + +static enum Imm +imm(Con *c, int k, int64_t *pn) +{ + int64_t n; + int i; + + if (c->type != CBits) + return Iother; + n = c->bits.i; + if (k == Kw) + n = (int32_t)n; + i = Iplo12; + if (n < 0) { + i = Inlo12; + n = -n; + } + *pn = n; + if ((n & 0x000fff) == n) + return i; + if ((n & 0xfff000) == n) + return i + 1; + if ((n & 0xffffff) == n) + return i + 2; + return Iother; +} + +int +arm64_logimm(uint64_t x, int k) +{ + uint64_t n; + + if (k == Kw) + x = (x & 0xffffffff) | x << 32; + if (x & 1) + x = ~x; + if (x == 0) + return 0; + if (x == 0xaaaaaaaaaaaaaaaa) + return 1; + n = x & 0xf; + if (0x1111111111111111 * n == x) + goto Check; + n = x & 0xff; + if (0x0101010101010101 * n == x) + goto Check; + n = x & 0xffff; + if (0x0001000100010001 * n == x) + goto Check; + n = x & 0xffffffff; + if (0x0000000100000001 * n == x) + goto Check; + n = x; +Check: + return (n & (n + (n & -n))) == 0; +} + +static void +fixarg(Ref *pr, int k, int phi, Fn *fn) +{ + Ref r0, r1, r2; + int s, n; + Con *c; + + r0 = *pr; + switch (rtype(r0)) { + case RCon: + if (KBASE(k) == 0 && phi) + return; + r1 = newtmp("isel", k, fn); + if (KBASE(k) == 0) { + emit(Ocopy, k, r1, r0, R); + } else { + c = &fn->con[r0.val]; + n = gasstashfp(c->bits.i, KWIDE(k)); + vgrow(&fn->con, ++fn->ncon); + c = &fn->con[fn->ncon-1]; + *c = (Con){.type = CAddr, .local = 1}; + sprintf(c->label, "fp%d", n); + r2 = newtmp("isel", Kl, fn); + emit(Oload, k, r1, r2, R); + emit(Ocopy, Kl, r2, CON(c-fn->con), R); + } + *pr = r1; + break; + case RTmp: + s = fn->tmp[r0.val].slot; + if (s == -1) + break; + r1 = newtmp("isel", Kl, fn); + emit(Oaddr, Kl, r1, SLOT(s), R); + *pr = r1; + break; + } +} + +static int +selcmp(Ref arg[2], int k, Fn *fn) +{ + Ref r, *iarg; + Con *c; + int swap, cmp, fix; + int64_t n; + + if (KBASE(k) == 1) { + emit(Oafcmp, k, R, arg[0], arg[1]); + iarg = curi->arg; + fixarg(&iarg[0], k, 0, fn); + fixarg(&iarg[1], k, 0, fn); + return 0; + } + swap = rtype(arg[0]) == RCon; + if (swap) { + r = arg[1]; + arg[1] = arg[0]; + arg[0] = r; + } + fix = 1; + cmp = Oacmp; + r = arg[1]; + if (rtype(r) == RCon) { + c = &fn->con[r.val]; + switch (imm(c, k, &n)) { + default: + break; + case Iplo12: + case Iphi12: + fix = 0; + break; + case Inlo12: + case Inhi12: + cmp = Oacmn; + r = getcon(n, fn); + fix = 0; + break; + } + } + emit(cmp, k, R, arg[0], r); + iarg = curi->arg; + fixarg(&iarg[0], k, 0, fn); + if (fix) + fixarg(&iarg[1], k, 0, fn); + return swap; +} + +static void +sel(Ins i, Fn *fn) +{ + Ref *iarg; + Ins *i0; + int ck, cc; + + if (iscmp(i.op, &ck, &cc)) { + emit(Oflag, i.cls, i.to, R, R); + i0 = curi; + if (selcmp(i.arg, ck, fn)) + i0->op += cmpop(cc); + else + i0->op += cc; + } else if (i.op != Onop) { + emiti(i); + iarg = curi->arg; /* fixarg() can change curi */ + fixarg(&iarg[0], argcls(&i, 0), 0, fn); + fixarg(&iarg[1], argcls(&i, 1), 0, fn); + } +} + +static void +seljmp(Blk *b, Fn *fn) +{ + Ref r; + Ins *i, *ir; + int ck, cc, use; + + switch (b->jmp.type) { + default: + assert(0 && "TODO 2"); + break; + case Jret0: + case Jjmp: + return; + case Jjnz: + break; + } + r = b->jmp.arg; + use = -1; + b->jmp.arg = R; + ir = 0; + i = &b->ins[b->nins]; + while (i > b->ins) + if (req((--i)->to, r)) { + use = fn->tmp[r.val].nuse; + ir = i; + break; + } + if (ir && use == 1 + && iscmp(ir->op, &ck, &cc)) { + if (selcmp(ir->arg, ck, fn)) + cc = cmpop(cc); + b->jmp.type = Jjf + cc; + *ir = (Ins){.op = Onop}; + } + else { + selcmp((Ref[]){r, CON_Z}, Kw, fn); + b->jmp.type = Jjfine; + } +} + +void +arm64_isel(Fn *fn) +{ + Blk *b, **sb; + Ins *i; + Phi *p; + uint n, al; + int64_t sz; + + /* assign slots to fast allocs */ + b = fn->start; + /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ + for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) + for (i=b->ins; i-b->ins < b->nins; i++) + if (i->op == al) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-15) + err("invalid alloc size %"PRId64, sz); + sz = (sz + n-1) & -n; + sz /= 4; + fn->tmp[i->to.val].slot = fn->slot; + fn->slot += sz; + *i = (Ins){.op = Onop}; + } + + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) + for (p=(*sb)->phi; p; p=p->link) { + for (n=0; p->blk[n] != b; n++) + assert(n+1 < p->narg); + fixarg(&p->arg[n], p->cls, 1, fn); + } + seljmp(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + sel(*--i, fn); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + + if (debug['I']) { + fprintf(stderr, "\n> After instruction selection:\n"); + printfn(fn, stderr); + } +} diff --git a/arm64/targ.c b/arm64/targ.c new file mode 100644 index 0000000..ead6932 --- /dev/null +++ b/arm64/targ.c @@ -0,0 +1,51 @@ +#include "all.h" + +int arm64_rsave[] = { + R0, R1, R2, R3, R4, R5, R6, R7, + R8, R9, R10, R11, R12, R13, R14, R15, + IP0, IP1, R18, + V0, V1, V2, V3, V4, V5, V6, V7, + V16, V17, V18, V19, V20, V21, V22, V23, + V24, V25, V26, V27, V28, V29, V30, + -1 +}; +int arm64_rclob[] = { + R19, R20, R21, R22, R23, R24, R25, R26, + R27, R28, + V8, V9, V10, V11, V12, V13, V14, V15, + -1 +}; + +#define RGLOB (BIT(FP) | BIT(SP) | BIT(R18)) + +static int +arm64_memargs(int op) +{ + (void)op; + return 0; +} + +Target T_arm64 = { + .gpr0 = R0, + .ngpr = NGPR, + .fpr0 = V0, + .nfpr = NFPR, + .rglob = RGLOB, + .nrglob = 3, + .rsave = arm64_rsave, + .nrsave = {NGPS, NFPS}, + .retregs = arm64_retregs, + .argregs = arm64_argregs, + .memargs = arm64_memargs, + .abi = arm64_abi, + .isel = arm64_isel, + .emitfn = arm64_emitfn, +}; + +MAKESURE(globals_are_not_arguments, + (RGLOB & (BIT(R8+1) - 1)) == 0 +); +MAKESURE(arrays_size_ok, + sizeof arm64_rsave == (NGPS+NFPS+1) * sizeof(int) && + sizeof arm64_rclob == (NCLR+1) * sizeof(int) +); |