diff options
Diffstat (limited to 'amd64/emit.c')
-rw-r--r-- | amd64/emit.c | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/amd64/emit.c b/amd64/emit.c new file mode 100644 index 0000000..eccbd02 --- /dev/null +++ b/amd64/emit.c @@ -0,0 +1,561 @@ +#include "all.h" + + +#define CMP(X) \ + X(Ciule, "be") \ + X(Ciult, "b") \ + X(Cisle, "le") \ + X(Cislt, "l") \ + X(Cisgt, "g") \ + X(Cisge, "ge") \ + X(Ciugt, "a") \ + X(Ciuge, "ae") \ + X(Cieq, "z") \ + X(Cine, "nz") \ + X(NCmpI+Cfle, "be") \ + X(NCmpI+Cflt, "b") \ + X(NCmpI+Cfgt, "a") \ + X(NCmpI+Cfge, "ae") \ + X(NCmpI+Cfeq, "z") \ + X(NCmpI+Cfne, "nz") \ + X(NCmpI+Cfo, "np") \ + X(NCmpI+Cfuo, "p") + +enum { + SLong = 0, + SWord = 1, + SShort = 2, + SByte = 3, + + Ki = -1, /* matches Kw and Kl */ + Ka = -2, /* matches all classes */ +}; + +/* Instruction format strings: + * + * if the format string starts with -, the instruction + * is assumed to be 3-address and is put in 2-address + * mode using an extra mov if necessary + * + * if the format string starts with +, the same as the + * above applies, but commutativity is also assumed + * + * %k is used to set the class of the instruction, + * it'll expand to "l", "q", "ss", "sd", depending + * on the instruction class + * %0 designates the first argument + * %1 designates the second argument + * %= designates the result + * + * if %k is not used, a prefix to 0, 1, or = must be + * added, it can be: + * M - memory reference + * L - long (64 bits) + * W - word (32 bits) + * H - short (16 bits) + * B - byte (8 bits) + * S - single precision float + * D - double precision float + */ +static struct { + short op; + short cls; + char *asm; +} omap[] = { + { Oadd, Ka, "+add%k %1, %=" }, + { Osub, Ka, "-sub%k %1, %=" }, + { Oand, Ki, "+and%k %1, %=" }, + { Oor, Ki, "+or%k %1, %=" }, + { Oxor, Ki, "+xor%k %1, %=" }, + { Osar, Ki, "-sar%k %B1, %=" }, + { Oshr, Ki, "-shr%k %B1, %=" }, + { Oshl, Ki, "-shl%k %B1, %=" }, + { Omul, Ki, "+imul%k %1, %=" }, + { Omul, Ks, "+mulss %1, %=" }, + { Omul, Kd, "+mulsd %1, %=" }, + { Odiv, Ka, "-div%k %1, %=" }, + { Ostorel, Ka, "movq %L0, %M1" }, + { Ostorew, Ka, "movl %W0, %M1" }, + { Ostoreh, Ka, "movw %H0, %M1" }, + { Ostoreb, Ka, "movb %B0, %M1" }, + { Ostores, Ka, "movss %S0, %M1" }, + { Ostored, Ka, "movsd %D0, %M1" }, + { Oload, Ka, "mov%k %M0, %=" }, + { Oloadsw, Kl, "movslq %M0, %L=" }, + { Oloadsw, Kw, "movl %M0, %W=" }, + { Oloaduw, Ki, "movl %M0, %W=" }, + { Oloadsh, Ki, "movsw%k %M0, %=" }, + { Oloaduh, Ki, "movzw%k %M0, %=" }, + { Oloadsb, Ki, "movsb%k %M0, %=" }, + { Oloadub, Ki, "movzb%k %M0, %=" }, + { Oextsw, Kl, "movslq %W0, %L=" }, + { Oextuw, Kl, "movl %W0, %W=" }, + { Oextsh, Ki, "movsw%k %H0, %=" }, + { Oextuh, Ki, "movzw%k %H0, %=" }, + { Oextsb, Ki, "movsb%k %B0, %=" }, + { Oextub, Ki, "movzb%k %B0, %=" }, + + { Oexts, Kd, "cvtss2sd %0, %=" }, + { Otruncd, Ks, "cvttsd2ss %0, %=" }, + { Ostosi, Ki, "cvttss2si%k %0, %=" }, + { Odtosi, Ki, "cvttsd2si%k %0, %=" }, + { Oswtof, Ka, "cvtsi2%k %W0, %=" }, + { Osltof, Ka, "cvtsi2%k %L0, %=" }, + { Ocast, Ki, "movq %D0, %L=" }, + { Ocast, Ka, "movq %L0, %D=" }, + + { Oaddr, Ki, "lea%k %M0, %=" }, + { Oswap, Ki, "xchg%k %0, %1" }, + { Osign, Kl, "cqto" }, + { Osign, Kw, "cltd" }, + { Oxdiv, Ki, "div%k %0" }, + { Oxidiv, Ki, "idiv%k %0" }, + { Oxcmp, Ks, "comiss %S0, %S1" }, + { Oxcmp, Kd, "comisd %D0, %D1" }, + { Oxcmp, Ki, "cmp%k %0, %1" }, + { Oxtest, Ki, "test%k %0, %1" }, +#define X(c, s) \ + { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" }, + CMP(X) +#undef X + { NOp, 0, 0 } +}; + +static char *rname[][4] = { + [RAX] = {"rax", "eax", "ax", "al"}, + [RBX] = {"rbx", "ebx", "bx", "bl"}, + [RCX] = {"rcx", "ecx", "cx", "cl"}, + [RDX] = {"rdx", "edx", "dx", "dl"}, + [RSI] = {"rsi", "esi", "si", "sil"}, + [RDI] = {"rdi", "edi", "di", "dil"}, + [RBP] = {"rbp", "ebp", "bp", "bpl"}, + [RSP] = {"rsp", "esp", "sp", "spl"}, + [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, + [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, + [R10] = {"r10", "r10d", "r10w", "r10b"}, + [R11] = {"r11", "r11d", "r11w", "r11b"}, + [R12] = {"r12", "r12d", "r12w", "r12b"}, + [R13] = {"r13", "r13d", "r13w", "r13b"}, + [R14] = {"r14", "r14d", "r14w", "r14b"}, + [R15] = {"r15", "r15d", "r15w", "r15b"}, +}; + + +static int +slot(int s, Fn *fn) +{ + struct { int i:29; } x; + + /* sign extend s using a bitfield */ + x.i = s; + assert(x.i <= fn->slot); + /* specific to NAlign == 3 */ + if (x.i < 0) + return -4 * x.i; + else if (fn->vararg) + return -176 + -4 * (fn->slot - x.i); + else + return -4 * (fn->slot - x.i); +} + +static void +emitcon(Con *con, FILE *f) +{ + switch (con->type) { + case CAddr: + if (con->local) + fprintf(f, "%s%s", gasloc, con->label); + else + fprintf(f, "%s%s", gassym, con->label); + if (con->bits.i) + fprintf(f, "%+"PRId64, con->bits.i); + break; + case CBits: + fprintf(f, "%"PRId64, con->bits.i); + break; + default: + die("unreachable"); + } +} + +static char * +regtoa(int reg, int sz) +{ + static char buf[6]; + + if (reg >= XMM0) { + sprintf(buf, "xmm%d", reg-XMM0); + return buf; + } else + return rname[reg][sz]; +} + +static Ref +getarg(char c, Ins *i) +{ + switch (c) { + case '0': + return i->arg[0]; + case '1': + return i->arg[1]; + case '=': + return i->to; + default: + die("invalid arg letter %c", c); + } +} + +static void emitins(Ins, Fn *, FILE *); + +static void +emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) +{ + Ins icp; + + icp.op = Ocopy; + icp.arg[0] = r2; + icp.to = r1; + icp.cls = k; + emitins(icp, fn, f); +} + +static void +emitf(char *s, Ins *i, Fn *fn, FILE *f) +{ + static char clstoa[][3] = {"l", "q", "ss", "sd"}; + char c; + int sz; + Ref ref; + Mem *m; + Con off; + + switch (*s) { + case '+': + if (req(i->arg[1], i->to)) { + ref = i->arg[0]; + i->arg[0] = i->arg[1]; + i->arg[1] = ref; + } + /* fall through */ + case '-': + assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) && + "cannot convert to 2-address"); + emitcopy(i->to, i->arg[0], i->cls, fn, f); + s++; + break; + } + + fputc('\t', f); +Next: + while ((c = *s++) != '%') + if (!c) { + fputc('\n', f); + return; + } else + fputc(c, f); + switch ((c = *s++)) { + case '%': + fputc('%', f); + break; + case 'k': + fputs(clstoa[i->cls], f); + break; + case '0': + case '1': + case '=': + sz = KWIDE(i->cls) ? SLong : SWord; + s--; + goto Ref; + case 'D': + case 'S': + sz = SLong; /* does not matter for floats */ + Ref: + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + case RTmp: + assert(isreg(ref)); + fprintf(f, "%%%s", regtoa(ref.val, sz)); + break; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RMem: + Mem: + m = &fn->mem[ref.val]; + if (rtype(m->base) == RSlot) { + off.type = CBits; + off.bits.i = slot(m->base.val, fn); + addcon(&m->offset, &off); + m->base = TMP(RBP); + } + if (m->offset.type != CUndef) + emitcon(&m->offset, f); + fputc('(', f); + if (req(m->base, R)) + fprintf(f, "%%rip"); + else + fprintf(f, "%%%s", regtoa(m->base.val, SLong)); + if (!req(m->index, R)) + fprintf(f, ", %%%s, %d", + regtoa(m->index.val, SLong), + m->scale + ); + fputc(')', f); + break; + case RCon: + fputc('$', f); + emitcon(&fn->con[ref.val], f); + break; + default: + die("unreachable"); + } + break; + case 'L': + sz = SLong; + goto Ref; + case 'W': + sz = SWord; + goto Ref; + case 'H': + sz = SShort; + goto Ref; + case 'B': + sz = SByte; + goto Ref; + case 'M': + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + case RMem: + goto Mem; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RCon: + emitcon(&fn->con[ref.val], f); + fprintf(f, "(%%rip)"); + break; + case RTmp: + assert(isreg(ref)); + fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); + break; + default: + die("unreachable"); + } + break; + default: + die("invalid format specifier %%%c", c); + } + goto Next; +} + +static void +emitins(Ins i, Fn *fn, FILE *f) +{ + Ref r; + int64_t val; + int o; + + switch (i.op) { + default: + Table: + /* most instructions are just pulled out of + * the table omap[], some special cases are + * detailed below */ + for (o=0;; o++) { + /* this linear search should really be a binary + * search */ + if (omap[o].op == NOp) + die("no match for %s(%d)", + optab[i.op].name, "wlsd"[i.cls]); + if (omap[o].op == i.op) + if (omap[o].cls == i.cls + || (omap[o].cls == Ki && KBASE(i.cls) == 0) + || (omap[o].cls == Ka)) + break; + } + emitf(omap[o].asm, &i, fn, f); + break; + case Onop: + /* just do nothing for nops, they are inserted + * by some passes */ + break; + case Omul: + /* here, we try to use the 3-addresss form + * of multiplication when possible */ + if (rtype(i.arg[1]) == RCon) { + r = i.arg[0]; + i.arg[0] = i.arg[1]; + i.arg[1] = r; + } + if (KBASE(i.cls) == 0 /* only available for ints */ + && rtype(i.arg[0]) == RCon + && rtype(i.arg[1]) == RTmp) { + emitf("imul%k %0, %1, %=", &i, fn, f); + break; + } + goto Table; + case Osub: + /* we have to use the negation trick to handle + * some 3-address substractions */ + if (req(i.to, i.arg[1])) { + emitf("neg%k %=", &i, fn, f); + emitf("add%k %0, %=", &i, fn, f); + break; + } + goto Table; + case Ocopy: + /* make sure we don't emit useless copies, + * also, we can use a trick to load 64-bits + * registers, it's detailed in my note below + * http://c9x.me/art/notes.html?09/19/2015 */ + if (req(i.to, R) || req(i.arg[0], R)) + break; + if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && i.cls == Kl + && fn->con[i.arg[0].val].type == CBits + && (val = fn->con[i.arg[0].val].bits.i) >= 0 + && val <= UINT32_MAX) { + emitf("movl %W0, %W=", &i, fn, f); + } else if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && fn->con[i.arg[0].val].type == CAddr) { + emitf("lea%k %M0, %=", &i, fn, f); + } else if (!req(i.arg[0], i.to)) + emitf("mov%k %0, %=", &i, fn, f); + break; + case Ocall: + /* calls simply have a weird syntax in AT&T + * assembly... */ + switch (rtype(i.arg[0])) { + case RCon: + fprintf(f, "\tcallq "); + emitcon(&fn->con[i.arg[0].val], f); + fprintf(f, "\n"); + break; + case RTmp: + emitf("callq *%L0", &i, fn, f); + break; + default: + die("invalid call argument"); + } + break; + case Osalloc: + /* there is no good reason why this is here + * maybe we should split Osalloc in 2 different + * instructions depending on the result + */ + emitf("subq %L0, %%rsp", &i, fn, f); + if (!req(i.to, R)) + emitcopy(i.to, TMP(RSP), Kl, fn, f); + break; + case Oswap: + if (KBASE(i.cls) == 0) + goto Table; + /* for floats, there is no swap instruction + * so we use xmm15 as a temporary + */ + emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); + emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); + emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); + break; + } +} + +static int +framesz(Fn *fn) +{ + int i, o, f; + + /* specific to NAlign == 3 */ + for (i=0, o=0; i<NCLR; i++) + o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]); + f = fn->slot; + f = (f + 3) & -4; + return 4*f + 8*o + 176*fn->vararg; +} + +void +amd64_emitfn(Fn *fn, FILE *f) +{ + static char *ctoa[] = { + #define X(c, s) [c] = s, + CMP(X) + #undef X + }; + static int id0; + Blk *b, *s; + Ins *i, itmp; + int *r, c, fs, o, n, lbl; + + fprintf(f, ".text\n"); + if (fn->export) + fprintf(f, ".globl %s%s\n", gassym, fn->name); + fprintf(f, + "%s%s:\n" + "\tpushq %%rbp\n" + "\tmovq %%rsp, %%rbp\n", + gassym, fn->name + ); + fs = framesz(fn); + if (fs) + fprintf(f, "\tsub $%d, %%rsp\n", fs); + if (fn->vararg) { + o = -176; + for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8) + fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o); + for (n=0; n<8; ++n, o+=16) + fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o); + } + for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++) + if (fn->reg & BIT(*r)) { + itmp.arg[0] = TMP(*r); + emitf("pushq %L0", &itmp, fn, f); + } + + for (lbl=0, b=fn->start; b; b=b->link) { + if (lbl || b->npred > 1) + fprintf(f, "%sbb%d:\n", gasloc, id0+b->id); + for (i=b->ins; i!=&b->ins[b->nins]; i++) + emitins(*i, fn, f); + lbl = 1; + switch (b->jmp.type) { + case Jret0: + for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;) + if (fn->reg & BIT(*--r)) { + itmp.arg[0] = TMP(*r); + emitf("popq %L0", &itmp, fn, f); + } + fprintf(f, + "\tleave\n" + "\tret\n" + ); + break; + case Jjmp: + Jmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp %sbb%d\n", + gasloc, id0+b->s1->id); + else + lbl = 0; + break; + default: + c = b->jmp.type - Jjf; + if (0 <= c && c <= NCmp) { + if (b->link == b->s2) { + s = b->s1; + b->s1 = b->s2; + b->s2 = s; + } else + c = cmpneg(c); + fprintf(f, "\tj%s %sbb%d\n", ctoa[c], + gasloc, id0+b->s2->id); + goto Jmp; + } + die("unhandled jump %d", b->jmp.type); + } + } + id0 += fn->nblk; +} |