#include "all.h" typedef struct E E; struct E { FILE *f; Fn *fn; uint64_t frame; uint padding; }; #define CMP(X) \ X(Cieq, "eq") \ X(Cine, "ne") \ X(Cisge, "ge") \ X(Cisgt, "gt") \ X(Cisle, "le") \ X(Cislt, "lt") \ X(Ciuge, "cs") \ X(Ciugt, "hi") \ X(Ciule, "ls") \ X(Ciult, "cc") \ X(NCmpI+Cfeq, "eq") \ X(NCmpI+Cfge, "ge") \ X(NCmpI+Cfgt, "gt") \ X(NCmpI+Cfle, "ls") \ X(NCmpI+Cflt, "mi") \ X(NCmpI+Cfne, "ne") \ X(NCmpI+Cfo, "vc") \ X(NCmpI+Cfuo, "vs") enum { Ki = -1, /* matches Kw and Kl */ Ka = -2, /* matches all classes */ }; static struct { short op; short cls; char *asm; } omap[] = { { Oadd, Ki, "add %=, %0, %1" }, { Oadd, Ka, "fadd %=, %0, %1" }, { Osub, Ki, "sub %=, %0, %1" }, { Osub, Ka, "fsub %=, %0, %1" }, { Oneg, Ki, "neg %=, %0" }, { Oneg, Ka, "fneg %=, %0" }, { Oand, Ki, "and %=, %0, %1" }, { Oor, Ki, "orr %=, %0, %1" }, { Oxor, Ki, "eor %=, %0, %1" }, { Osar, Ki, "asr %=, %0, %1" }, { Oshr, Ki, "lsr %=, %0, %1" }, { Oshl, Ki, "lsl %=, %0, %1" }, { Omul, Ki, "mul %=, %0, %1" }, { Omul, Ka, "fmul %=, %0, %1" }, { Odiv, Ki, "sdiv %=, %0, %1" }, { Odiv, Ka, "fdiv %=, %0, %1" }, { Oudiv, Ki, "udiv %=, %0, %1" }, { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, { Ocopy, Ki, "mov %=, %0" }, { Ocopy, Ka, "fmov %=, %0" }, { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" }, { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" }, { Ostoreb, Kw, "strb %W0, %M1" }, { Ostoreh, Kw, "strh %W0, %M1" }, { Ostorew, Kw, "str %W0, %M1" }, { Ostorel, Kw, "str %L0, %M1" }, { Ostores, Kw, "str %S0, %M1" }, { Ostored, Kw, "str %D0, %M1" }, { Oloadsb, Ki, "ldrsb %=, %M0" }, { Oloadub, Ki, "ldrb %W=, %M0" }, { Oloadsh, Ki, "ldrsh %=, %M0" }, { Oloaduh, Ki, "ldrh %W=, %M0" }, { Oloadsw, Kw, "ldr %=, %M0" }, { Oloadsw, Kl, "ldrsw %=, %M0" }, { Oloaduw, Ki, "ldr %W=, %M0" }, { Oload, Ka, "ldr %=, %M0" }, { Oextsb, Ki, "sxtb %=, %W0" }, { Oextub, Ki, "uxtb %W=, %W0" }, { Oextsh, Ki, "sxth %=, %W0" }, { Oextuh, Ki, "uxth %W=, %W0" }, { Oextsw, Ki, "sxtw %L=, %W0" }, { Oextuw, Ki, "mov %W=, %W0" }, { Oexts, Kd, "fcvt %=, %S0" }, { Otruncd, Ks, "fcvt %=, %D0" }, { Ocast, Kw, "fmov %=, %S0" }, { Ocast, Kl, "fmov %=, %D0" }, { Ocast, Ks, "fmov %=, %W0" }, { Ocast, Kd, "fmov %=, %L0" }, { Ostosi, Ka, "fcvtzs %=, %S0" }, { Ostoui, Ka, "fcvtzu %=, %S0" }, { Odtosi, Ka, "fcvtzs %=, %D0" }, { Odtoui, Ka, "fcvtzu %=, %D0" }, { Oswtof, Ka, "scvtf %=, %W0" }, { Ouwtof, Ka, "ucvtf %=, %W0" }, { Osltof, Ka, "scvtf %=, %L0" }, { Oultof, Ka, "ucvtf %=, %L0" }, { Ocall, Kw, "blr %L0" }, { Oacmp, Ki, "cmp %0, %1" }, { Oacmn, Ki, "cmn %0, %1" }, { Oafcmp, Ka, "fcmpe %0, %1" }, #define X(c, str) \ { Oflag+c, Ki, "cset %=, " str }, CMP(X) #undef X { NOp, 0, 0 } }; static char * rname(int r, int k) { static char buf[4]; if (r == SP) { assert(k == Kl); sprintf(buf, "sp"); } else if (R0 <= r && r <= LR) switch (k) { default: die("invalid class"); case Kw: sprintf(buf, "w%d", r-R0); break; case Kx: case Kl: sprintf(buf, "x%d", r-R0); break; } else if (V0 <= r && r <= V30) switch (k) { default: die("invalid class"); case Ks: sprintf(buf, "s%d", r-V0); break; case Kx: case Kd: sprintf(buf, "d%d", r-V0); break; } else die("invalid register"); return buf; } static uint64_t slot(Ref r, E *e) { int s; s = rsval(r); if (s == -1) return 16 + e->frame; if (s < 0) { if (e->fn->vararg && !T.apple) return 16 + e->frame + 192 - (s+2); else return 16 + e->frame - (s+2); } else return 16 + e->padding + 4 * s; } static void emitf(char *s, Ins *i, E *e) { Ref r; int k, c; Con *pc; uint n, sp; fputc('\t', e->f); sp = 0; for (;;) { k = i->cls; while ((c = *s++) != '%') if (c == ' ' && !sp) { fputc('\t', e->f); sp = 1; } else if ( !c) { fputc('\n', e->f); return; } else fputc(c, e->f); Switch: switch ((c = *s++)) { default: die("invalid escape"); case 'W': k = Kw; goto Switch; case 'L': k = Kl; goto Switch; case 'S': k = Ks; goto Switch; case 'D': k = Kd; goto Switch; case '?': if (KBASE(k) == 0) fputs(rname(R18, k), e->f); else fputs(k==Ks ? "s31" : "d31", e->f); break; case '=': case '0': r = c == '=' ? i->to : i->arg[0]; assert(isreg(r)); fputs(rname(r.val, k), e->f); break; case '1': r = i->arg[1]; switch (rtype(r)) { default: die("invalid second argument"); case RTmp: assert(isreg(r)); fputs(rname(r.val, k), e->f); break; case RCon: pc = &e->fn->con[r.val]; n = pc->bits.i; assert(pc->type == CBits); if (n & 0xfff000) fprintf(e->f, "#%u, lsl #12", n>>12); else fprintf(e->f, "#%u", n); break; } break; case 'M': c = *s++; assert(c == '0' || c == '1' || c == '='); r = c == '=' ? i->to : i->arg[c - '0']; switch (rtype(r)) { default: die("todo (arm emit): unhandled ref"); case RTmp: assert(isreg(r)); fprintf(e->f, "[%s]", rname(r.val, Kl)); break; case RSlot: fprintf(e->f, "[x29, %"PRIu64"]", slot(r, e)); break; } break; } } } static void loadaddr(Con *c, char *rn, E *e) { char *p, *l, *s; switch (c->sym.type) { default: die("unreachable"); case SGlo: if (T.apple) s = "\tadrp\tR, S@pageO\n" "\tadd\tR, R, S@pageoffO\n"; else s = "\tadrp\tR, SO\n" "\tadd\tR, R, #:lo12:SO\n"; break; case SThr: if (T.apple) s = "\tadrp\tR, S@tlvppage\n" "\tldr\tR, [R, S@tlvppageoff]\n"; else s = "\tmrs\tR, tpidr_el0\n" "\tadd\tR, R, #:tprel_hi12:SO, lsl #12\n" "\tadd\tR, R, #:tprel_lo12_nc:SO\n"; break; } l = str(c->sym.id); p = l[0] == '"' ? "" : T.assym; for (; *s; s++) switch (*s) { default: fputc(*s, e->f); break; case 'R': fputs(rn, e->f); break; case 'S': fputs(p, e->f); fputs(l, e->f); break; case 'O': if (c->bits.i) /* todo, handle large offsets */ fprintf(e->f, "+%"PRIi64, c->bits.i); break; } } static void loadcon(Con *c, int r, int k, E *e) { char *rn; int64_t n; int w, sh; w = KWIDE(k); rn = rname(r, k); n = c->bits.i; if (c->type == CAddr) { loadaddr(c, rn, e); return; } assert(c->type == CBits); if (!w) n = (int32_t)n; if ((n | 0xffff) == -1 || arm64_logimm(n, k)) { fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n); } else { fprintf(e->f, "\tmov\t%s, #%d\n", rn, (int)(n & 0xffff)); for (sh=16; n>>=16; sh+=16) { if ((!w && sh == 32) || sh == 64) break; fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n", rn, (uint)(n & 0xffff), sh); } } } static void emitins(Ins *, E *); static void fixarg(Ref *pr, int sz, E *e) { Ins *i; Ref r; uint64_t s; r = *pr; if (rtype(r) == RSlot) { s = slot(r, e); if (s > sz * 4095u) { i = &(Ins){Oaddr, Kl, TMP(IP0), {r}}; emitins(i, e); *pr = TMP(IP0); } } } static void emitins(Ins *i, E *e) { char *l, *p, *rn; uint64_t s; int o; Ref r; Con *c; switch (i->op) { default: if (isload(i->op)) fixarg(&i->arg[0], loadsz(i), e); if (isstore(i->op)) fixarg(&i->arg[1], storesz(i), e); Table: /* most instructions are just pulled out of * the table omap[], some special cases are * detailed below */ for (o=0;; o++) { /* this linear search should really be a binary * search */ if (omap[o].op == NOp) die("no match for %s(%c)", optab[i->op].name, "wlsd"[i->cls]); if (omap[o].op == i->op) if (omap[o].cls == i->cls || omap[o].cls == Ka || (omap[o].cls == Ki && KBASE(i->cls) == 0)) break; } emitf(omap[o].asm, i, e); break; case Onop: break; case Ocopy: if (req(i->to, i->arg[0])) break; if (rtype(i->to) == RSlot) { r = i->to; if (!isreg(i->arg[0])) { i->to = TMP(R18); emitins(i, e); i->arg[0] = i->to; } i->op = Ostorew + i->cls; i->cls = Kw; i->arg[1] = r; emitins(i, e); break; } assert(isreg(i->to)); switch (rtype(i->arg[0])) { case RCon: c = &e->fn->con[i->arg[0].val]; loadcon(c, i->to.val, i->cls, e); break; case RSlot: i->op = Oload; emitins(i, e); break; default: assert(i->to.val != R18); goto Table; } break; case Oaddr: assert(rtype(i->arg[0]) == RSlot); rn = rname(i->to.val, Kl); s = slot(i->arg[0], e); if (s <= 4095) fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s); else if (s <= 65535) fprintf(e->f, "\tmov\t%s, #%"PRIu64"\n" "\tadd\t%s, x29, %s\n", rn, s, rn, rn ); else fprintf(e->f, "\tmov\t%s, #%"PRIu64"\n" "\tmovk\t%s, #%"PRIu64", lsl #16\n" "\tadd\t%s, x29, %s\n", rn, s & 0xFFFF, rn, s >> 16, rn, rn ); break; case Ocall: if (rtype(i->arg[0]) != RCon) goto Table; c = &e->fn->con[i->arg[0].val]; if (c->type != CAddr || c->sym.type != SGlo || c->bits.i) die("invalid call argument"); l = str(c->sym.id); p = l[0] == '"' ? "" : T.assym; fprintf(e->f, "\tbl\t%s%s\n", p, l); break; case Osalloc: emitf("sub sp, sp, %0", i, e); if (!req(i->to, R)) emitf("mov %=, sp", i, e); break; case Odbgloc: emitdbgloc(i->arg[0].val, e->f); break; } } static void framelayout(E *e) { int *r; uint o; uint64_t f; for (o=0, r=arm64_rclob; *r>=0; r++) o += 1 & (e->fn->reg >> *r); f = e->fn->slot; f = (f + 3) & -4; o += o & 1; e->padding = 4*(f-e->fn->slot); e->frame = 4*f + 8*o; } /* Stack-frame layout: +=============+ | varargs | | save area | +-------------+ | callee-save | ^ | registers | | +-------------+ | | ... | | | spill slots | | | ... | | e->frame +-------------+ | | ... | | | locals | | | ... | | +-------------+ | | e->padding | v +-------------+ | saved x29 | | saved x30 | +=============+ <- x29 */ void arm64_emitfn(Fn *fn, FILE *out) { static char *ctoa[] = { #define X(c, s) [c] = s, CMP(X) #undef X }; static int id0; int s, n, c, lbl, *r; uint64_t o; Blk *b, *t; Ins *i; E *e; e = &(E){.f = out, .fn = fn}; if (T.apple) e->fn->lnk.align = 4; emitfnlnk(e->fn->name, &e->fn->lnk, e->f); framelayout(e); if (e->fn->vararg && !T.apple) { for (n=7; n>=0; n--) fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n); for (n=7; n>=0; n-=2) fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n); } if (e->frame + 16 <= 512) fprintf(e->f, "\tstp\tx29, x30, [sp, -%"PRIu64"]!\n", e->frame + 16 ); else if (e->frame <= 4095) fprintf(e->f, "\tsub\tsp, sp, #%"PRIu64"\n" "\tstp\tx29, x30, [sp, -16]!\n", e->frame ); else if (e->frame <= 65535) fprintf(e->f, "\tmov\tx16, #%"PRIu64"\n" "\tsub\tsp, sp, x16\n" "\tstp\tx29, x30, [sp, -16]!\n", e->frame ); else fprintf(e->f, "\tmov\tx16, #%"PRIu64"\n" "\tmovk\tx16, #%"PRIu64", lsl #16\n" "\tsub\tsp, sp, x16\n" "\tstp\tx29, x30, [sp, -16]!\n", e->frame & 0xFFFF, e->frame >> 16 ); fputs("\tmov\tx29, sp\n", e->f); s = (e->frame - e->padding) / 4; for (r=arm64_rclob; *r>=0; r++) if (e->fn->reg & BIT(*r)) { s -= 2; i = &(Ins){.arg = {TMP(*r), SLOT(s)}}; i->op = *r >= V0 ? Ostored : Ostorel; emitins(i, e); } for (lbl=0, b=e->fn->start; b; b=b->link) { if (lbl || b->npred > 1) fprintf(e->f, "%s%d:\n", T.asloc, id0+b->id); for (i=b->ins; i!=&b->ins[b->nins]; i++) emitins(i, e); lbl = 1; switch (b->jmp.type) { case Jhlt: fprintf(e->f, "\tbrk\t#1000\n"); break; case Jret0: s = (e->frame - e->padding) / 4; for (r=arm64_rclob; *r>=0; r++) if (e->fn->reg & BIT(*r)) { s -= 2; i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}}; i->cls = *r >= V0 ? Kd : Kl; emitins(i, e); } if (e->fn->dynalloc) fputs("\tmov sp, x29\n", e->f); o = e->frame + 16; if (e->fn->vararg && !T.apple) o += 192; if (o <= 504) fprintf(e->f, "\tldp\tx29, x30, [sp], %"PRIu64"\n", o ); else if (o - 16 <= 4095) fprintf(e->f, "\tldp\tx29, x30, [sp], 16\n" "\tadd\tsp, sp, #%"PRIu64"\n", o - 16 ); else if (o - 16 <= 65535) fprintf(e->f, "\tldp\tx29, x30, [sp], 16\n" "\tmov\tx16, #%"PRIu64"\n" "\tadd\tsp, sp, x16\n", o - 16 ); else fprintf(e->f, "\tldp\tx29, x30, [sp], 16\n" "\tmov\tx16, #%"PRIu64"\n" "\tmovk\tx16, #%"PRIu64", lsl #16\n" "\tadd\tsp, sp, x16\n", (o - 16) & 0xFFFF, (o - 16) >> 16 ); fprintf(e->f, "\tret\n"); break; case Jjmp: Jmp: if (b->s1 != b->link) fprintf(e->f, "\tb\t%s%d\n", T.asloc, id0+b->s1->id ); else lbl = 0; break; default: c = b->jmp.type - Jjf; if (c < 0 || c > NCmp) die("unhandled jump %d", b->jmp.type); if (b->link == b->s2) { t = b->s1; b->s1 = b->s2; b->s2 = t; } else c = cmpneg(c); fprintf(e->f, "\tb%s\t%s%d\n", ctoa[c], T.asloc, id0+b->s2->id ); goto Jmp; } } id0 += e->fn->nblk; if (!T.apple) elf_emitfnfin(fn->name, out); }