From 49a4593c335126ba279f47328824abfef379725e Mon Sep 17 00:00:00 2001 From: Quentin Carbonneaux Date: Sat, 8 Apr 2017 21:06:33 -0400 Subject: prepare for multi-target This big diff does multiple changes to allow the addition of new targets to qbe. The changes are listed below in decreasing order of impact. 1. Add a new Target structure. To add support for a given target, one has to implement all the members of the Target structure. All the source files where changed to use this interface where needed. 2. Single out amd64-specific code. In this commit, the amd64 target T_amd64_sysv is the only target available, it is implemented in the amd64/ directory. All the non-static items in this directory are prefixed with either amd64_ or amd64_sysv (for items that are specific to the System V ABI). 3. Centralize Ops information. There is now a file 'ops.h' that must be used to store all the available operations together with their metadata. The various targets will only select what they need; but it is beneficial that there is only *one* place to change to add a new instruction. One good side effect of this change is that any operation 'xyz' in the IL now as a corresponding 'Oxyz' in the code. 4. Misc fixes. One notable change is that instruction selection now generates generic comparison operations and the lowering to the target's comparisons is done in the emitter. GAS directives for data are the same for many targets, so data emission was extracted in a file 'gas.c'. 5. Modularize the Makefile. The Makefile now has a list of C files that are target-independent (SRC), and one list of C files per target. Each target can also use its own 'all.h' header (for example to define registers). --- Makefile | 28 ++- all.h | 334 +++++++++------------------ amd64/all.h | 70 ++++++ amd64/emit.c | 561 ++++++++++++++++++++++++++++++++++++++++++++++ amd64/isel.c | 603 +++++++++++++++++++++++++++++++++++++++++++++++++ amd64/sysv.c | 701 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ amd64/targ.c | 30 +++ cfg.c | 4 +- emit.c | 696 --------------------------------------------------------- fold.c | 62 +++--- gas.c | 122 ++++++++++ isel.c | 649 ----------------------------------------------------- live.c | 30 ++- main.c | 68 ++++-- mem.c | 4 +- ops.h | 167 ++++++++++++++ parse.c | 144 +++--------- rega.c | 38 ++-- spill.c | 32 +-- sysv.c | 718 ----------------------------------------------------------- util.c | 90 ++++++++ 21 files changed, 2641 insertions(+), 2510 deletions(-) create mode 100644 amd64/all.h create mode 100644 amd64/emit.c create mode 100644 amd64/isel.c create mode 100644 amd64/sysv.c create mode 100644 amd64/targ.c delete mode 100644 emit.c create mode 100644 gas.c delete mode 100644 isel.c create mode 100644 ops.h delete mode 100644 sysv.c diff --git a/Makefile b/Makefile index f8e3da0..2433e25 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,15 @@ BIN = qbe -ABI = sysv V = @ OBJDIR = obj -SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c fold.c live.c $(ABI).c isel.c spill.c rega.c emit.c -OBJ = $(SRC:%.c=$(OBJDIR)/%.o) +SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c \ + fold.c live.c spill.c rega.c gas.c +AMD64SRC = amd64/targ.c amd64/sysv.c amd64/isel.c amd64/emit.c +SRCALL = $(SRC) $(AMD64SRC) + +AMD64OBJ = $(AMD64SRC:%.c=$(OBJDIR)/%.o) +OBJ = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ) CFLAGS += -Wall -Wextra -std=c99 -g -pedantic @@ -19,15 +23,23 @@ $(OBJDIR)/%.o: %.c $(OBJDIR)/timestamp $(OBJDIR)/timestamp: @mkdir -p $(OBJDIR) + @mkdir -p $(OBJDIR)/amd64 @touch $@ -$(OBJ): all.h +$(OBJ): all.h ops.h +$(AMD64OBJ): amd64/all.h obj/main.o: config.h config.h: - @case `uname` in \ - *Darwin*) echo "#define Defaultasm Gasmacho" ;; \ - *) echo "#define Defaultasm Gaself" ;; \ + @case `uname` in \ + *Darwin*) \ + echo "#define Defasm Gasmacho"; \ + echo "#define Deftgt T_amd64_sysv"; \ + ;; \ + *) \ + echo "#define Defasm Gaself"; \ + echo "#define Deftgt T_amd64_sysv"; \ + ;; \ esac > $@ install: $(OBJDIR)/$(BIN) @@ -47,7 +59,7 @@ check: $(OBJDIR)/$(BIN) tools/unit.sh all 80: - @for F in $(SRC); \ + @for F in $(SRCALL); \ do \ awk "{ \ gsub(/\\t/, \" \"); \ diff --git a/all.h b/all.h index 124a8d2..c0e08fe 100644 --- a/all.h +++ b/all.h @@ -8,13 +8,14 @@ #define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1] #define die(...) die_(__FILE__, __VA_ARGS__) +typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned long ulong; typedef unsigned long long bits; typedef struct BSet BSet; typedef struct Ref Ref; -typedef struct OpDesc OpDesc; +typedef struct Op Op; typedef struct Ins Ins; typedef struct Phi Phi; typedef struct Blk Blk; @@ -27,6 +28,7 @@ typedef struct Fn Fn; typedef struct Typ Typ; typedef struct Seg Seg; typedef struct Dat Dat; +typedef struct Target Target; enum { NString = 32, @@ -38,61 +40,29 @@ enum { NBit = CHAR_BIT * sizeof(bits), }; -#define BIT(n) ((bits)1 << (n)) - -enum Reg { - RXX, - - RAX, /* caller-save */ - RCX, - RDX, - RSI, - RDI, - R8, - R9, - R10, - R11, - - RBX, /* callee-save */ - R12, - R13, - R14, - R15, - - RBP, /* globally live */ - RSP, -#define RGLOB (BIT(RBP)|BIT(RSP)) - - XMM0, /* sse */ - XMM1, - XMM2, - XMM3, - XMM4, - XMM5, - XMM6, - XMM7, - XMM8, - XMM9, - XMM10, - XMM11, - XMM12, - XMM13, - XMM14, - XMM15, - - Tmp0, /* first non-reg temporary */ - - NRGlob = 2, - NIReg = R15 - RAX + 1 + NRGlob, - NFReg = XMM14 - XMM0 + 1, /* XMM15 is reserved */ - NISave = R11 - RAX + 1, - NFSave = NFReg, - NRSave = NISave + NFSave, - NRClob = R15 - RBX + 1, +struct Target { + int gpr0; /* first general purpose reg */ + int ngpr; + int fpr0; /* first floating point reg */ + int nfpr; + bits rglob; /* globally live regs (e.g., sp, fp) */ + int nrglob; + int *rsave; /* caller-save */ + int nrsave[2]; + bits (*retregs)(Ref, int[2]); + bits (*argregs)(Ref, int[2]); + int (*memargs)(int); + void (*abi)(Fn *); + void (*isel)(Fn *); + void (*emitfn)(Fn *, FILE *); }; -MAKESURE(NBit_is_enough, NBit >= (int)Tmp0); +#define BIT(n) ((bits)1 << (n)) +enum { + RXX = 0, + Tmp0 = NBit, /* first non-reg temporary */ +}; struct BSet { uint nt; @@ -139,51 +109,81 @@ static inline int isreg(Ref r) return rtype(r) == RTmp && r.val < Tmp0; } -enum ICmp { -#define ICMPS(X) \ - X(ule) \ - X(ult) \ - X(sle) \ - X(slt) \ - X(sgt) \ - X(sge) \ - X(ugt) \ - X(uge) \ - X(eq) \ - X(ne) /* make sure icmpop() below works! */ - -#define X(c) IC##c, - ICMPS(X) -#undef X - NICmp, +enum CmpI { + Cieq, + Cine, + Cisge, + Cisgt, + Cisle, + Cislt, + Ciuge, + Ciugt, + Ciule, + Ciult, + NCmpI, +}; - ICxnp = NICmp, /* x64 specific */ - ICxp, - NXICmp +enum CmpF { + Cfeq, + Cfge, + Cfgt, + Cfle, + Cflt, + Cfne, + Cfo, + Cfuo, + NCmpF, + NCmp = NCmpI + NCmpF, }; -static inline int icmpop(int c) -{ - return c >= ICeq ? c : ICuge - c; -} +enum O { + Oxxx, +#define O(op, x, y) O##op, + #include "ops.h" + NOp, +}; -enum FCmp { -#define FCMPS(X) \ - X(le) \ - X(lt) \ - X(gt) \ - X(ge) \ - X(ne) \ - X(eq) \ - X(o) \ - X(uo) - -#define X(c) FC##c, - FCMPS(X) +enum J { + Jxxx, +#define JMPS(X) \ + X(ret0) X(retw) X(retl) X(rets) \ + X(retd) X(retc) X(jmp) X(jnz) \ + X(jfieq) X(jfine) X(jfisge) X(jfisgt) \ + X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \ + X(jfiule) X(jfiult) X(jffeq) X(jffge) \ + X(jffgt) X(jffle) X(jfflt) X(jffne) \ + X(jffo) X(jffuo) +#define X(j) J##j, + JMPS(X) #undef X - NFCmp + NJmp +}; + +enum { + Ocmpw = Oceqw, + Ocmpw1 = Ocultw, + Ocmpl = Oceql, + Ocmpl1 = Ocultl, + Ocmps = Oceqs, + Ocmps1 = Ocuos, + Ocmpd = Oceqd, + Ocmpd1 = Ocuod, + Oalloc = Oalloc4, + Oalloc1 = Oalloc16, + Oflag = Oflagieq, + Oflag1 = Oflagfuo, + NPubOp = Onop, + Jjf = Jjfieq, + Jjf1 = Jjffuo, }; +#define isstore(o) (Ostoreb <= o && o <= Ostored) +#define isload(o) (Oloadsb <= o && o <= Oload) +#define isext(o) (Oextsb <= o && o <= Oextuw) +#define ispar(o) (Opar <= o && o <= Opare) +#define isarg(o) (Oarg <= o && o <= Oarge) +#define isret(j) (Jret0 <= j && j <= Jretc) + enum Class { Kx = -1, /* "top" class (see usecheck() and clsmerge()) */ Kw, @@ -195,124 +195,10 @@ enum Class { #define KWIDE(k) ((k)&1) #define KBASE(k) ((k)>>1) -enum Op { - Oxxx, - - /* public instructions */ - Oadd, - Osub, - Odiv, - Orem, - Oudiv, - Ourem, - Omul, - Oand, - Oor, - Oxor, - Osar, - Oshr, - Oshl, - Ocmpw, - Ocmpw1 = Ocmpw + NICmp-1, - Ocmpl, - Ocmpl1 = Ocmpl + NICmp-1, - Ocmps, - Ocmps1 = Ocmps + NFCmp-1, - Ocmpd, - Ocmpd1 = Ocmpd + NFCmp-1, - - Ostoreb, - Ostoreh, - Ostorew, - Ostorel, - Ostores, - Ostored, -#define isstore(o) (Ostoreb <= o && o <= Ostored) - Oloadsb, /* must match Oext and Tmp.width */ - Oloadub, - Oloadsh, - Oloaduh, - Oloadsw, - Oloaduw, - Oload, -#define isload(o) (Oloadsb <= o && o <= Oload) - Oextsb, - Oextub, - Oextsh, - Oextuh, - Oextsw, - Oextuw, -#define isext(o) (Oextsb <= o && o <= Oextuw) - - Oexts, - Otruncd, - Ostosi, - Odtosi, - Oswtof, - Osltof, - Ocast, - - Oalloc, - Oalloc1 = Oalloc + NAlign-1, - - Ovastart, - Ovaarg, - - Ocopy, - NPubOp, - - /* function instructions */ - Opar = NPubOp, - Oparc, - Opare, -#define ispar(o) (Opar <= o && o <= Opare) - Oarg, - Oargc, - Oarge, -#define isarg(o) (Oarg <= o && o <= Oarge) - Ocall, - Ovacall, - - /* reserved instructions */ - Onop, - Oaddr, - Oswap, - Osign, - Osalloc, - Oxidiv, - Oxdiv, - Oxcmp, - Oxset, - Oxsetnp = Oxset + ICxnp, - Oxsetp = Oxset + ICxp, - Oxtest, - NOp -}; - -enum Jmp { - Jxxx, - Jret0, - Jretw, - Jretl, - Jrets, - Jretd, - Jretc, -#define isret(j) (Jret0 <= j && j <= Jretc) - Jjmp, - Jjnz, - Jxjc, - Jxjnp = Jxjc + ICxnp, - Jxjp = Jxjc + ICxp, - NJmp -}; - -struct OpDesc { +struct Op { char *name; - int nmem; short argcls[2][4]; - uint sflag:1; /* sets the zero flag */ - uint lflag:1; /* leaves flags */ - uint cfold:1; /* can fold */ + int canfold; }; struct Ins { @@ -437,7 +323,7 @@ struct Con { typedef struct Addr Addr; -struct Addr { /* x64 addressing */ +struct Addr { /* amd64 addressing */ Con offset; Ref base; Ref index; @@ -508,8 +394,8 @@ struct Dat { char export; }; - /* main.c */ +extern Target T; extern char debug['Z'+1]; /* util.c */ @@ -524,6 +410,8 @@ void die_(char *, char *, ...) __attribute__((noreturn)); void *emalloc(size_t); void *alloc(size_t); void freeall(void); +int argcls(Ins *, int); +int iscmp(int, int *, int *); void emit(int, int, Ref, Ref, Ref); void emiti(Ins); void idup(Ins **, Ins *, ulong); @@ -531,12 +419,15 @@ Ins *icpy(Ins *, Ins *, ulong); void *vnew(ulong, size_t, Pool); void vfree(void *); void vgrow(void *, ulong); +int cmpop(int); +int cmpneg(int); int clsmerge(short *, short); int phicls(int, Tmp *); Ref newtmp(char *, int, Fn *); void chuse(Ref, int, Fn *); Ref getcon(int64_t, Fn *); void addcon(Con *, Con *); +void blit(Ref, uint, Ref, uint, Fn *); void dumpts(BSet *, Tmp *, FILE *); void bsinit(BSet *, uint); @@ -559,7 +450,7 @@ bshas(BSet *bs, uint elt) } /* parse.c */ -extern OpDesc opdesc[NOp]; +extern Op optab[NOp]; void parse(FILE *, char *, void (Dat *), void (Fn *)); void printfn(Fn *, FILE *); void printref(Ref, Fn *, FILE *); @@ -611,16 +502,6 @@ void fold(Fn *); void liveon(BSet *, Blk *, Blk *); void filllive(Fn *); -/* abi: sysv.c */ -extern int rsave[/* NRSave */]; -extern int rclob[/* NRClob */]; -bits retregs(Ref, int[2]); -bits argregs(Ref, int[2]); -void abi(Fn *); - -/* isel.c */ -void isel(Fn *); - /* spill.c */ void fillcost(Fn *); void spill(Fn *); @@ -628,10 +509,9 @@ void spill(Fn *); /* rega.c */ void rega(Fn *); -/* emit.c */ -extern char *locprefix; -extern char *symprefix; -void emitfn(Fn *, FILE *); -void emitdat(Dat *, FILE *); -int stashfp(int64_t, int); -void emitfin(FILE *); +/* gas.c */ +extern char *gasloc; +extern char *gassym; +void gasemitdat(Dat *, FILE *); +int gasstashfp(int64_t, int); +void gasemitfin(FILE *); diff --git a/amd64/all.h b/amd64/all.h new file mode 100644 index 0000000..3a2db0e --- /dev/null +++ b/amd64/all.h @@ -0,0 +1,70 @@ +#include "../all.h" + +typedef struct Amd64Op Amd64Op; + +enum Amd64Reg { + RAX = RXX+1, /* caller-save */ + RCX, + RDX, + RSI, + RDI, + R8, + R9, + R10, + R11, + + RBX, /* callee-save */ + R12, + R13, + R14, + R15, + + RBP, /* globally live */ + RSP, + + XMM0, /* sse */ + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, + XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15, + + NFPR = XMM14 - XMM0 + 1, /* reserve XMM15 */ + NGPR = RSP - RAX + 1, + NGPS = R11 - RAX + 1, + NFPS = NFPR, + NCLR = R15 - RBX + 1, +}; +MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0); + +struct Amd64Op { + char nmem; + char zflag; + char lflag; +}; + +/* targ.c */ +extern Amd64Op amd64_op[]; + +/* sysv.c (abi) */ +extern int amd64_sysv_rsave[]; +extern int amd64_sysv_rclob[]; +bits amd64_sysv_retregs(Ref, int[2]); +bits amd64_sysv_argregs(Ref, int[2]); +void amd64_sysv_abi(Fn *); + +/* isel.c */ +void amd64_isel(Fn *); + +/* emit.c */ +void amd64_emitfn(Fn *, FILE *); diff --git a/amd64/emit.c b/amd64/emit.c new file mode 100644 index 0000000..eccbd02 --- /dev/null +++ b/amd64/emit.c @@ -0,0 +1,561 @@ +#include "all.h" + + +#define CMP(X) \ + X(Ciule, "be") \ + X(Ciult, "b") \ + X(Cisle, "le") \ + X(Cislt, "l") \ + X(Cisgt, "g") \ + X(Cisge, "ge") \ + X(Ciugt, "a") \ + X(Ciuge, "ae") \ + X(Cieq, "z") \ + X(Cine, "nz") \ + X(NCmpI+Cfle, "be") \ + X(NCmpI+Cflt, "b") \ + X(NCmpI+Cfgt, "a") \ + X(NCmpI+Cfge, "ae") \ + X(NCmpI+Cfeq, "z") \ + X(NCmpI+Cfne, "nz") \ + X(NCmpI+Cfo, "np") \ + X(NCmpI+Cfuo, "p") + +enum { + SLong = 0, + SWord = 1, + SShort = 2, + SByte = 3, + + Ki = -1, /* matches Kw and Kl */ + Ka = -2, /* matches all classes */ +}; + +/* Instruction format strings: + * + * if the format string starts with -, the instruction + * is assumed to be 3-address and is put in 2-address + * mode using an extra mov if necessary + * + * if the format string starts with +, the same as the + * above applies, but commutativity is also assumed + * + * %k is used to set the class of the instruction, + * it'll expand to "l", "q", "ss", "sd", depending + * on the instruction class + * %0 designates the first argument + * %1 designates the second argument + * %= designates the result + * + * if %k is not used, a prefix to 0, 1, or = must be + * added, it can be: + * M - memory reference + * L - long (64 bits) + * W - word (32 bits) + * H - short (16 bits) + * B - byte (8 bits) + * S - single precision float + * D - double precision float + */ +static struct { + short op; + short cls; + char *asm; +} omap[] = { + { Oadd, Ka, "+add%k %1, %=" }, + { Osub, Ka, "-sub%k %1, %=" }, + { Oand, Ki, "+and%k %1, %=" }, + { Oor, Ki, "+or%k %1, %=" }, + { Oxor, Ki, "+xor%k %1, %=" }, + { Osar, Ki, "-sar%k %B1, %=" }, + { Oshr, Ki, "-shr%k %B1, %=" }, + { Oshl, Ki, "-shl%k %B1, %=" }, + { Omul, Ki, "+imul%k %1, %=" }, + { Omul, Ks, "+mulss %1, %=" }, + { Omul, Kd, "+mulsd %1, %=" }, + { Odiv, Ka, "-div%k %1, %=" }, + { Ostorel, Ka, "movq %L0, %M1" }, + { Ostorew, Ka, "movl %W0, %M1" }, + { Ostoreh, Ka, "movw %H0, %M1" }, + { Ostoreb, Ka, "movb %B0, %M1" }, + { Ostores, Ka, "movss %S0, %M1" }, + { Ostored, Ka, "movsd %D0, %M1" }, + { Oload, Ka, "mov%k %M0, %=" }, + { Oloadsw, Kl, "movslq %M0, %L=" }, + { Oloadsw, Kw, "movl %M0, %W=" }, + { Oloaduw, Ki, "movl %M0, %W=" }, + { Oloadsh, Ki, "movsw%k %M0, %=" }, + { Oloaduh, Ki, "movzw%k %M0, %=" }, + { Oloadsb, Ki, "movsb%k %M0, %=" }, + { Oloadub, Ki, "movzb%k %M0, %=" }, + { Oextsw, Kl, "movslq %W0, %L=" }, + { Oextuw, Kl, "movl %W0, %W=" }, + { Oextsh, Ki, "movsw%k %H0, %=" }, + { Oextuh, Ki, "movzw%k %H0, %=" }, + { Oextsb, Ki, "movsb%k %B0, %=" }, + { Oextub, Ki, "movzb%k %B0, %=" }, + + { Oexts, Kd, "cvtss2sd %0, %=" }, + { Otruncd, Ks, "cvttsd2ss %0, %=" }, + { Ostosi, Ki, "cvttss2si%k %0, %=" }, + { Odtosi, Ki, "cvttsd2si%k %0, %=" }, + { Oswtof, Ka, "cvtsi2%k %W0, %=" }, + { Osltof, Ka, "cvtsi2%k %L0, %=" }, + { Ocast, Ki, "movq %D0, %L=" }, + { Ocast, Ka, "movq %L0, %D=" }, + + { Oaddr, Ki, "lea%k %M0, %=" }, + { Oswap, Ki, "xchg%k %0, %1" }, + { Osign, Kl, "cqto" }, + { Osign, Kw, "cltd" }, + { Oxdiv, Ki, "div%k %0" }, + { Oxidiv, Ki, "idiv%k %0" }, + { Oxcmp, Ks, "comiss %S0, %S1" }, + { Oxcmp, Kd, "comisd %D0, %D1" }, + { Oxcmp, Ki, "cmp%k %0, %1" }, + { Oxtest, Ki, "test%k %0, %1" }, +#define X(c, s) \ + { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" }, + CMP(X) +#undef X + { NOp, 0, 0 } +}; + +static char *rname[][4] = { + [RAX] = {"rax", "eax", "ax", "al"}, + [RBX] = {"rbx", "ebx", "bx", "bl"}, + [RCX] = {"rcx", "ecx", "cx", "cl"}, + [RDX] = {"rdx", "edx", "dx", "dl"}, + [RSI] = {"rsi", "esi", "si", "sil"}, + [RDI] = {"rdi", "edi", "di", "dil"}, + [RBP] = {"rbp", "ebp", "bp", "bpl"}, + [RSP] = {"rsp", "esp", "sp", "spl"}, + [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, + [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, + [R10] = {"r10", "r10d", "r10w", "r10b"}, + [R11] = {"r11", "r11d", "r11w", "r11b"}, + [R12] = {"r12", "r12d", "r12w", "r12b"}, + [R13] = {"r13", "r13d", "r13w", "r13b"}, + [R14] = {"r14", "r14d", "r14w", "r14b"}, + [R15] = {"r15", "r15d", "r15w", "r15b"}, +}; + + +static int +slot(int s, Fn *fn) +{ + struct { int i:29; } x; + + /* sign extend s using a bitfield */ + x.i = s; + assert(x.i <= fn->slot); + /* specific to NAlign == 3 */ + if (x.i < 0) + return -4 * x.i; + else if (fn->vararg) + return -176 + -4 * (fn->slot - x.i); + else + return -4 * (fn->slot - x.i); +} + +static void +emitcon(Con *con, FILE *f) +{ + switch (con->type) { + case CAddr: + if (con->local) + fprintf(f, "%s%s", gasloc, con->label); + else + fprintf(f, "%s%s", gassym, con->label); + if (con->bits.i) + fprintf(f, "%+"PRId64, con->bits.i); + break; + case CBits: + fprintf(f, "%"PRId64, con->bits.i); + break; + default: + die("unreachable"); + } +} + +static char * +regtoa(int reg, int sz) +{ + static char buf[6]; + + if (reg >= XMM0) { + sprintf(buf, "xmm%d", reg-XMM0); + return buf; + } else + return rname[reg][sz]; +} + +static Ref +getarg(char c, Ins *i) +{ + switch (c) { + case '0': + return i->arg[0]; + case '1': + return i->arg[1]; + case '=': + return i->to; + default: + die("invalid arg letter %c", c); + } +} + +static void emitins(Ins, Fn *, FILE *); + +static void +emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) +{ + Ins icp; + + icp.op = Ocopy; + icp.arg[0] = r2; + icp.to = r1; + icp.cls = k; + emitins(icp, fn, f); +} + +static void +emitf(char *s, Ins *i, Fn *fn, FILE *f) +{ + static char clstoa[][3] = {"l", "q", "ss", "sd"}; + char c; + int sz; + Ref ref; + Mem *m; + Con off; + + switch (*s) { + case '+': + if (req(i->arg[1], i->to)) { + ref = i->arg[0]; + i->arg[0] = i->arg[1]; + i->arg[1] = ref; + } + /* fall through */ + case '-': + assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) && + "cannot convert to 2-address"); + emitcopy(i->to, i->arg[0], i->cls, fn, f); + s++; + break; + } + + fputc('\t', f); +Next: + while ((c = *s++) != '%') + if (!c) { + fputc('\n', f); + return; + } else + fputc(c, f); + switch ((c = *s++)) { + case '%': + fputc('%', f); + break; + case 'k': + fputs(clstoa[i->cls], f); + break; + case '0': + case '1': + case '=': + sz = KWIDE(i->cls) ? SLong : SWord; + s--; + goto Ref; + case 'D': + case 'S': + sz = SLong; /* does not matter for floats */ + Ref: + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + case RTmp: + assert(isreg(ref)); + fprintf(f, "%%%s", regtoa(ref.val, sz)); + break; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RMem: + Mem: + m = &fn->mem[ref.val]; + if (rtype(m->base) == RSlot) { + off.type = CBits; + off.bits.i = slot(m->base.val, fn); + addcon(&m->offset, &off); + m->base = TMP(RBP); + } + if (m->offset.type != CUndef) + emitcon(&m->offset, f); + fputc('(', f); + if (req(m->base, R)) + fprintf(f, "%%rip"); + else + fprintf(f, "%%%s", regtoa(m->base.val, SLong)); + if (!req(m->index, R)) + fprintf(f, ", %%%s, %d", + regtoa(m->index.val, SLong), + m->scale + ); + fputc(')', f); + break; + case RCon: + fputc('$', f); + emitcon(&fn->con[ref.val], f); + break; + default: + die("unreachable"); + } + break; + case 'L': + sz = SLong; + goto Ref; + case 'W': + sz = SWord; + goto Ref; + case 'H': + sz = SShort; + goto Ref; + case 'B': + sz = SByte; + goto Ref; + case 'M': + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + case RMem: + goto Mem; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RCon: + emitcon(&fn->con[ref.val], f); + fprintf(f, "(%%rip)"); + break; + case RTmp: + assert(isreg(ref)); + fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); + break; + default: + die("unreachable"); + } + break; + default: + die("invalid format specifier %%%c", c); + } + goto Next; +} + +static void +emitins(Ins i, Fn *fn, FILE *f) +{ + Ref r; + int64_t val; + int o; + + switch (i.op) { + default: + Table: + /* most instructions are just pulled out of + * the table omap[], some special cases are + * detailed below */ + for (o=0;; o++) { + /* this linear search should really be a binary + * search */ + if (omap[o].op == NOp) + die("no match for %s(%d)", + optab[i.op].name, "wlsd"[i.cls]); + if (omap[o].op == i.op) + if (omap[o].cls == i.cls + || (omap[o].cls == Ki && KBASE(i.cls) == 0) + || (omap[o].cls == Ka)) + break; + } + emitf(omap[o].asm, &i, fn, f); + break; + case Onop: + /* just do nothing for nops, they are inserted + * by some passes */ + break; + case Omul: + /* here, we try to use the 3-addresss form + * of multiplication when possible */ + if (rtype(i.arg[1]) == RCon) { + r = i.arg[0]; + i.arg[0] = i.arg[1]; + i.arg[1] = r; + } + if (KBASE(i.cls) == 0 /* only available for ints */ + && rtype(i.arg[0]) == RCon + && rtype(i.arg[1]) == RTmp) { + emitf("imul%k %0, %1, %=", &i, fn, f); + break; + } + goto Table; + case Osub: + /* we have to use the negation trick to handle + * some 3-address substractions */ + if (req(i.to, i.arg[1])) { + emitf("neg%k %=", &i, fn, f); + emitf("add%k %0, %=", &i, fn, f); + break; + } + goto Table; + case Ocopy: + /* make sure we don't emit useless copies, + * also, we can use a trick to load 64-bits + * registers, it's detailed in my note below + * http://c9x.me/art/notes.html?09/19/2015 */ + if (req(i.to, R) || req(i.arg[0], R)) + break; + if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && i.cls == Kl + && fn->con[i.arg[0].val].type == CBits + && (val = fn->con[i.arg[0].val].bits.i) >= 0 + && val <= UINT32_MAX) { + emitf("movl %W0, %W=", &i, fn, f); + } else if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && fn->con[i.arg[0].val].type == CAddr) { + emitf("lea%k %M0, %=", &i, fn, f); + } else if (!req(i.arg[0], i.to)) + emitf("mov%k %0, %=", &i, fn, f); + break; + case Ocall: + /* calls simply have a weird syntax in AT&T + * assembly... */ + switch (rtype(i.arg[0])) { + case RCon: + fprintf(f, "\tcallq "); + emitcon(&fn->con[i.arg[0].val], f); + fprintf(f, "\n"); + break; + case RTmp: + emitf("callq *%L0", &i, fn, f); + break; + default: + die("invalid call argument"); + } + break; + case Osalloc: + /* there is no good reason why this is here + * maybe we should split Osalloc in 2 different + * instructions depending on the result + */ + emitf("subq %L0, %%rsp", &i, fn, f); + if (!req(i.to, R)) + emitcopy(i.to, TMP(RSP), Kl, fn, f); + break; + case Oswap: + if (KBASE(i.cls) == 0) + goto Table; + /* for floats, there is no swap instruction + * so we use xmm15 as a temporary + */ + emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); + emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); + emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); + break; + } +} + +static int +framesz(Fn *fn) +{ + int i, o, f; + + /* specific to NAlign == 3 */ + for (i=0, o=0; ireg >> amd64_sysv_rclob[i]); + f = fn->slot; + f = (f + 3) & -4; + return 4*f + 8*o + 176*fn->vararg; +} + +void +amd64_emitfn(Fn *fn, FILE *f) +{ + static char *ctoa[] = { + #define X(c, s) [c] = s, + CMP(X) + #undef X + }; + static int id0; + Blk *b, *s; + Ins *i, itmp; + int *r, c, fs, o, n, lbl; + + fprintf(f, ".text\n"); + if (fn->export) + fprintf(f, ".globl %s%s\n", gassym, fn->name); + fprintf(f, + "%s%s:\n" + "\tpushq %%rbp\n" + "\tmovq %%rsp, %%rbp\n", + gassym, fn->name + ); + fs = framesz(fn); + if (fs) + fprintf(f, "\tsub $%d, %%rsp\n", fs); + if (fn->vararg) { + o = -176; + for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8) + fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o); + for (n=0; n<8; ++n, o+=16) + fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o); + } + for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++) + if (fn->reg & BIT(*r)) { + itmp.arg[0] = TMP(*r); + emitf("pushq %L0", &itmp, fn, f); + } + + for (lbl=0, b=fn->start; b; b=b->link) { + if (lbl || b->npred > 1) + fprintf(f, "%sbb%d:\n", gasloc, id0+b->id); + for (i=b->ins; i!=&b->ins[b->nins]; i++) + emitins(*i, fn, f); + lbl = 1; + switch (b->jmp.type) { + case Jret0: + for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;) + if (fn->reg & BIT(*--r)) { + itmp.arg[0] = TMP(*r); + emitf("popq %L0", &itmp, fn, f); + } + fprintf(f, + "\tleave\n" + "\tret\n" + ); + break; + case Jjmp: + Jmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp %sbb%d\n", + gasloc, id0+b->s1->id); + else + lbl = 0; + break; + default: + c = b->jmp.type - Jjf; + if (0 <= c && c <= NCmp) { + if (b->link == b->s2) { + s = b->s1; + b->s1 = b->s2; + b->s2 = s; + } else + c = cmpneg(c); + fprintf(f, "\tj%s %sbb%d\n", ctoa[c], + gasloc, id0+b->s2->id); + goto Jmp; + } + die("unhandled jump %d", b->jmp.type); + } + } + id0 += fn->nblk; +} diff --git a/amd64/isel.c b/amd64/isel.c new file mode 100644 index 0000000..1623b9b --- /dev/null +++ b/amd64/isel.c @@ -0,0 +1,603 @@ +#include "all.h" +#include + +/* For x86_64, do the following: + * + * - check that constants are used only in + * places allowed + * - ensure immediates always fit in 32b + * - expose machine register contraints + * on instructions like division. + * - implement fast locals (the streak of + * constant allocX in the first basic block) + * - recognize complex addressing modes + * + * Invariant: the use counts that are used + * in sel() must be sound. This + * is not so trivial, maybe the + * dce should be moved out... + */ + +typedef struct ANum ANum; + +struct ANum { + char n, l, r; + Ins *i; +}; + +static void amatch(Addr *, Ref, ANum *, Fn *, int); + +static int +noimm(Ref r, Fn *fn) +{ + int64_t val; + + if (rtype(r) != RCon) + return 0; + switch (fn->con[r.val].type) { + case CAddr: + /* we only support the 'small' + * code model of the ABI, this + * means that we can always + * address data with 32bits + */ + return 0; + case CBits: + val = fn->con[r.val].bits.i; + return (val < INT32_MIN || val > INT32_MAX); + default: + die("invalid constant"); + } +} + +static int +rslot(Ref r, Fn *fn) +{ + if (rtype(r) != RTmp) + return -1; + return fn->tmp[r.val].slot; +} + +static void +fixarg(Ref *r, int k, int cpy, Fn *fn) +{ + Addr a, *m; + Ref r0, r1; + int s, n; + + r1 = r0 = *r; + s = rslot(r0, fn); + if (KBASE(k) == 1 && rtype(r0) == RCon) { + /* load floating points from memory + * slots, they can't be used as + * immediates + */ + r1 = MEM(fn->nmem); + vgrow(&fn->mem, ++fn->nmem); + memset(&a, 0, sizeof a); + a.offset.type = CAddr; + a.offset.local = 1; + n = gasstashfp(fn->con[r0.val].bits.i, KWIDE(k)); + sprintf(a.offset.label, "fp%d", n); + fn->mem[fn->nmem-1] = a; + } + else if (!cpy && k == Kl && noimm(r0, fn)) { + /* load constants that do not fit in + * a 32bit signed integer into a + * long temporary + */ + r1 = newtmp("isel", Kl, fn); + emit(Ocopy, Kl, r1, r0, R); + } + else if (s != -1) { + /* load fast locals' addresses into + * temporaries right before the + * instruction + */ + r1 = newtmp("isel", Kl, fn); + emit(Oaddr, Kl, r1, SLOT(s), R); + } + else if (rtype(r0) == RMem) { + /* apple asm fix */ + m = &fn->mem[r0.val]; + if (req(m->base, R)) { + n = fn->ncon; + vgrow(&fn->con, ++fn->ncon); + fn->con[n] = m->offset; + m->offset.type = CUndef; + r0 = newtmp("isel", Kl, fn); + emit(Oaddr, Kl, r0, CON(n), R); + m->base = r0; + } + } + *r = r1; +} + +static void +seladdr(Ref *r, ANum *an, Fn *fn) +{ + Addr a; + Ref r0; + + r0 = *r; + if (rtype(r0) == RTmp) { + amatch(&a, r0, an, fn, 1); + if (req(a.base, r0)) + return; + if (a.offset.type == CAddr) + if (!req(a.base, R)) { + /* apple asm fix */ + if (!req(a.index, R)) + return; + else { + a.index = a.base; + a.scale = 1; + a.base = R; + } + } + chuse(r0, -1, fn); + vgrow(&fn->mem, ++fn->nmem); + fn->mem[fn->nmem-1] = a; + chuse(a.base, +1, fn); + chuse(a.index, +1, fn); + *r = MEM(fn->nmem-1); + } +} + +static int +selcmp(Ref arg[2], int k, Fn *fn) +{ + int swap; + Ref r, *iarg; + + swap = rtype(arg[0]) == RCon; + if (swap) { + r = arg[1]; + arg[1] = arg[0]; + arg[0] = r; + } + emit(Oxcmp, k, R, arg[1], arg[0]); + iarg = curi->arg; + if (rtype(arg[0]) == RCon) { + assert(k == Kl); + iarg[1] = newtmp("isel", k, fn); + emit(Ocopy, k, iarg[1], arg[0], R); + } + fixarg(&iarg[0], k, 0, fn); + fixarg(&iarg[1], k, 0, fn); + return swap; +} + +static void +sel(Ins i, ANum *an, Fn *fn) +{ + Ref r0, r1, *iarg; + int x, k, kc; + int64_t sz; + Ins *i0, *i1; + + if (rtype(i.to) == RTmp) + if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) + if (fn->tmp[i.to.val].nuse == 0) { + chuse(i.arg[0], -1, fn); + chuse(i.arg[1], -1, fn); + return; + } + i0 = curi; + k = i.cls; + switch (i.op) { + case Odiv: + case Orem: + case Oudiv: + case Ourem: + if (i.op == Odiv || i.op == Oudiv) + r0 = TMP(RAX), r1 = TMP(RDX); + else + r0 = TMP(RDX), r1 = TMP(RAX); + emit(Ocopy, k, i.to, r0, R); + emit(Ocopy, k, R, r1, R); + if (rtype(i.arg[1]) == RCon) { + /* immediates not allowed for + * divisions in x86 + */ + r0 = newtmp("isel", k, fn); + } else + r0 = i.arg[1]; + if (fn->tmp[r0.val].slot != -1) + err("unlikely argument %%%s in %s", + fn->tmp[r0.val].name, optab[i.op].name); + if (i.op == Odiv || i.op == Orem) { + emit(Oxidiv, k, R, r0, R); + emit(Osign, k, TMP(RDX), TMP(RAX), R); + } else { + emit(Oxdiv, k, R, r0, R); + emit(Ocopy, k, TMP(RDX), CON_Z, R); + } + emit(Ocopy, k, TMP(RAX), i.arg[0], R); + fixarg(&curi->arg[0], k, 0, fn); + if (rtype(i.arg[1]) == RCon) + emit(Ocopy, k, r0, i.arg[1], R); + break; + case Osar: + case Oshr: + case Oshl: + if (rtype(i.arg[1]) == RCon) + goto Emit; + r0 = i.arg[1]; + i.arg[1] = TMP(RCX); + emit(Ocopy, Kw, R, TMP(RCX), R); + emiti(i); + emit(Ocopy, Kw, TMP(RCX), r0, R); + break; + case Onop: + break; + case Ostored: + case Ostores: + case Ostorel: + case Ostorew: + case Ostoreh: + case Ostoreb: + if (rtype(i.arg[0]) == RCon) { + if (i.op == Ostored) + i.op = Ostorel; + if (i.op == Ostores) + i.op = Ostorew; + } + seladdr(&i.arg[1], an, fn); + goto Emit; + case_Oload: + seladdr(&i.arg[0], an, fn); + goto Emit; + case Ocall: + case Osalloc: + case Ocopy: + case Oadd: + case Osub: + case Omul: + case Oand: + case Oor: + case Oxor: + case Oxtest: + case Ostosi: + case Odtosi: + case Oswtof: + case Osltof: + case Oexts: + case Otruncd: + case Ocast: + case_OExt: +Emit: + emiti(i); + iarg = curi->arg; /* fixarg() can change curi */ + fixarg(&iarg[0], argcls(&i, 0), 0, fn); + fixarg(&iarg[1], argcls(&i, 1), 0, fn); + break; + case Oalloc: + case Oalloc+1: + case Oalloc+2: /* == Oalloc1 */ + /* we need to make sure + * the stack remains aligned + * (rsp = 0) mod 16 + */ + if (rtype(i.arg[0]) == RCon) { + sz = fn->con[i.arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-15) + err("invalid alloc size %"PRId64, sz); + sz = (sz + 15) & -16; + emit(Osalloc, Kl, i.to, getcon(sz, fn), R); + } else { + /* r0 = (i.arg[0] + 15) & -16 */ + r0 = newtmp("isel", Kl, fn); + r1 = newtmp("isel", Kl, fn); + emit(Osalloc, Kl, i.to, r0, R); + emit(Oand, Kl, r0, r1, getcon(-16, fn)); + emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn)); + if (fn->tmp[i.arg[0].val].slot != -1) + err("unlikely argument %%%s in %s", + fn->tmp[i.arg[0].val].name, optab[i.op].name); + } + break; + default: + if (isext(i.op)) + goto case_OExt; + if (isload(i.op)) + goto case_Oload; + if (iscmp(i.op, &kc, &x)) { + emit(Oflag+x, k, i.to, R, R); + i1 = curi; + if (selcmp(i.arg, kc, fn)) + i1->op = Oflag + cmpop(x); + break; + } + die("unknown instruction %s", optab[i.op].name); + } + + while (i0 > curi && --i0) { + assert(rslot(i0->arg[0], fn) == -1); + assert(rslot(i0->arg[1], fn) == -1); + } +} + +static Ins * +flagi(Ins *i0, Ins *i) +{ + while (i>i0) { + i--; + if (amd64_op[i->op].zflag) + return i; + if (amd64_op[i->op].lflag) + continue; + return 0; + } + return 0; +} + +static void +seljmp(Blk *b, Fn *fn) +{ + Ref r; + int c, k; + Ins *fi; + Tmp *t; + + if (b->jmp.type == Jret0 || b->jmp.type == Jjmp) + return; + assert(b->jmp.type == Jjnz); + r = b->jmp.arg; + t = &fn->tmp[r.val]; + b->jmp.arg = R; + assert(!req(r, R) && rtype(r) != RCon); + if (b->s1 == b->s2) { + chuse(r, -1, fn); + b->jmp.type = Jjmp; + b->s2 = 0; + return; + } + fi = flagi(b->ins, &b->ins[b->nins]); + if (!fi || !req(fi->to, r)) { + selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */ + b->jmp.type = Jjf + Cine; + } + else if (iscmp(fi->op, &k, &c)) { + if (t->nuse == 1) { + if (selcmp(fi->arg, k, fn)) + c = cmpop(c); + *fi = (Ins){.op = Onop}; + } + b->jmp.type = Jjf + c; + } + else if (fi->op == Oand && t->nuse == 1 + && (rtype(fi->arg[0]) == RTmp || + rtype(fi->arg[1]) == RTmp)) { + fi->op = Oxtest; + fi->to = R; + b->jmp.type = Jjf + Cine; + if (rtype(fi->arg[1]) == RCon) { + r = fi->arg[1]; + fi->arg[1] = fi->arg[0]; + fi->arg[0] = r; + } + } + else { + /* since flags are not tracked in liveness, + * the result of the flag-setting instruction + * has to be marked as live + */ + if (t->nuse == 1) + emit(Ocopy, Kw, R, r, R); + b->jmp.type = Jjf + Cine; + } +} + +static int +aref(Ref r, ANum *ai) +{ + switch (rtype(r)) { + case RCon: + return 2; + case RTmp: + return ai[r.val].n; + default: + die("constant or temporary expected"); + } +} + +static int +ascale(Ref r, Con *con) +{ + int64_t n; + + if (rtype(r) != RCon) + return 0; + if (con[r.val].type != CBits) + return 0; + n = con[r.val].bits.i; + return n == 1 || n == 2 || n == 4 || n == 8; +} + +static void +anumber(ANum *ai, Blk *b, Con *con) +{ + /* This should be made obsolete by a proper + * reassoc pass. + * + * Rules: + * + * RTmp(_) -> 0 tmp + * ( RTmp(_) -> 1 slot ) + * RCon(_) -> 2 con + * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) + */ + static char add[10][10] = { + [2] [2] = 2, /* folding */ + [2] [5] = 5, [5] [2] = 5, + [2] [6] = 6, [6] [2] = 6, + [2] [7] = 7, [7] [2] = 7, + [0] [0] = 4, /* 4: b + s * i */ + [0] [3] = 4, [3] [0] = 4, + [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */ + [0] [2] = 6, [2] [0] = 6, /* 6: o + b */ + [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */ + [0] [5] = 7, [5] [0] = 7, + [6] [3] = 7, [3] [6] = 7, + + }; + int a, a1, a2, n1, n2, t1, t2; + Ins *i; + + for (i=b->ins; i-b->ins < b->nins; i++) { + if (rtype(i->to) == RTmp) + ai[i->to.val].i = i; + if (i->op != Oadd && i->op != Omul) + continue; + a1 = aref(i->arg[0], ai); + a2 = aref(i->arg[1], ai); + t1 = a1 != 1 && a1 != 2; + t2 = a2 != 1 && a2 != 2; + if (i->op == Oadd) { + a = add[n1 = a1][n2 = a2]; + if (t1 && a < add[0][a2]) + a = add[n1 = 0][n2 = a2]; + if (t2 && a < add[a1][0]) + a = add[n1 = a1][n2 = 0]; + if (t1 && t2 && a < add[0][0]) + a = add[n1 = 0][n2 = 0]; + } else { + n1 = n2 = a = 0; + if (ascale(i->arg[0], con) && t2) + a = 3, n1 = 2, n2 = 0; + if (t1 && ascale(i->arg[1], con)) + a = 3, n1 = 0, n2 = 2; + } + ai[i->to.val].n = a; + ai[i->to.val].l = n1; + ai[i->to.val].r = n2; + } +} + +static void +amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top) +{ + Ins *i; + int nl, nr, t, s; + Ref al, ar; + + if (top) + memset(a, 0, sizeof *a); + if (rtype(r) == RCon) { + addcon(&a->offset, &fn->con[r.val]); + return; + } + assert(rtype(r) == RTmp); + i = ai[r.val].i; + nl = ai[r.val].l; + nr = ai[r.val].r; + if (i) { + if (nl > nr) { + al = i->arg[1]; + ar = i->arg[0]; + t = nl, nl = nr, nr = t; + } else { + al = i->arg[0]; + ar = i->arg[1]; + } + } + switch (ai[r.val].n) { + case 3: /* s * i */ + if (!top) { + a->index = al; + a->scale = fn->con[ar.val].bits.i; + } else + a->base = r; + break; + case 4: /* b + s * i */ + switch (nr) { + case 0: + if (fn->tmp[ar.val].slot != -1) { + al = i->arg[1]; + ar = i->arg[0]; + } + a->index = ar; + a->scale = 1; + break; + case 3: + amatch(a, ar, ai, fn, 0); + break; + } + r = al; + case 0: + s = fn->tmp[r.val].slot; + if (s != -1) + r = SLOT(s); + a->base = r; + break; + case 2: /* constants */ + case 5: /* o + s * i */ + case 6: /* o + b */ + case 7: /* o + b + s * i */ + amatch(a, ar, ai, fn, 0); + amatch(a, al, ai, fn, 0); + break; + default: + die("unreachable"); + } +} + +/* instruction selection + * requires use counts (as given by parsing) + */ +void +amd64_isel(Fn *fn) +{ + Blk *b, **sb; + Ins *i; + Phi *p; + uint a; + int n, al; + int64_t sz; + ANum *ainfo; + + /* assign slots to fast allocs */ + b = fn->start; + /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ + for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) + for (i=b->ins; i-b->ins < b->nins; i++) + if (i->op == al) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-15) + err("invalid alloc size %"PRId64, sz); + sz = (sz + n-1) & -n; + sz /= 4; + fn->tmp[i->to.val].slot = fn->slot; + fn->slot += sz; + *i = (Ins){.op = Onop}; + } + + /* process basic blocks */ + n = fn->ntmp; + ainfo = emalloc(n * sizeof ainfo[0]); + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) + for (p=(*sb)->phi; p; p=p->link) { + for (a=0; p->blk[a] != b; a++) + assert(a+1 < p->narg); + fixarg(&p->arg[a], p->cls, 1, fn); + } + memset(ainfo, 0, n * sizeof ainfo[0]); + anumber(ainfo, b, fn->con); + seljmp(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + sel(*--i, ainfo, fn); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + free(ainfo); + + if (debug['I']) { + fprintf(stderr, "\n> After instruction selection:\n"); + printfn(fn, stderr); + } +} diff --git a/amd64/sysv.c b/amd64/sysv.c new file mode 100644 index 0000000..dcaa812 --- /dev/null +++ b/amd64/sysv.c @@ -0,0 +1,701 @@ +#include "all.h" + +typedef struct AClass AClass; +typedef struct RAlloc RAlloc; + +struct AClass { + int inmem; + int align; + uint size; + int cls[2]; + Ref ref[2]; +}; + +struct RAlloc { + Ins i; + RAlloc *link; +}; + +static void +classify(AClass *a, Typ *t, int *pn, int *pe) +{ + Seg *seg; + int n, s, *cls; + + for (n=0; nnunion; n++) { + seg = t->seg[n]; + for (s=0; *pe<2; (*pe)++) { + cls = &a->cls[*pe]; + for (; *pn<8; s++) { + switch (seg[s].type) { + case SEnd: + goto Done; + case SPad: + /* don't change anything */ + break; + case SFlt: + if (*cls == Kx) + *cls = Kd; + break; + case SInt: + *cls = Kl; + break; + case STyp: + classify(a, &typ[seg[s].len], pn, pe); + continue; + } + *pn += seg[s].len; + } + Done: + assert(*pn <= 8); + *pn = 0; + } + } +} + +static void +typclass(AClass *a, Typ *t) +{ + int e, n; + uint sz, al; + + sz = t->size; + al = 1u << t->align; + + /* the ABI requires sizes to be rounded + * up to the nearest multiple of 8, moreover + * it makes it easy load and store structures + * in registers + */ + if (al < 8) + al = 8; + sz = (sz + al-1) & -al; + + a->size = sz; + a->align = t->align; + + if (t->dark || sz > 16 || sz == 0) { + /* large or unaligned structures are + * required to be passed in memory + */ + a->inmem = 1; + return; + } + + a->cls[0] = Kx; + a->cls[1] = Kx; + a->inmem = 0; + n = 0; + e = 0; + classify(a, t, &n, &e); +} + +static int +retr(Ref reg[2], AClass *aret) +{ + static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; + int n, k, ca, nr[2]; + + nr[0] = nr[1] = 0; + ca = 0; + for (n=0; (uint)n*8size; n++) { + k = KBASE(aret->cls[n]); + reg[n] = TMP(retreg[k][nr[k]++]); + ca += 1 << (2 * k); + } + return ca; +} + +static void +selret(Blk *b, Fn *fn) +{ + int j, k, ca; + Ref r, r0, reg[2]; + AClass aret; + + j = b->jmp.type; + + if (!isret(j) || j == Jret0) + return; + + r0 = b->jmp.arg; + b->jmp.type = Jret0; + + if (j == Jretc) { + typclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + assert(rtype(fn->retr) == RTmp); + emit(Ocopy, Kl, TMP(RAX), fn->retr, R); + blit(fn->retr, 0, r0, aret.size, fn); + ca = 1; + } else { + ca = retr(reg, &aret); + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + emit(Oload, Kl, reg[1], r, R); + emit(Oadd, Kl, r, r0, getcon(8, fn)); + } + emit(Oload, Kl, reg[0], r0, R); + } + } else { + k = j - Jretw; + if (KBASE(k) == 0) { + emit(Ocopy, k, TMP(RAX), r0, R); + ca = 1; + } else { + emit(Ocopy, k, TMP(XMM0), r0, R); + ca = 1 << 2; + } + } + + b->jmp.arg = CALL(ca); +} + +static int +argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env) +{ + int nint, ni, nsse, ns, n, *pn; + AClass *a; + Ins *i; + + if (aret && aret->inmem) + nint = 5; /* hidden argument */ + else + nint = 6; + nsse = 8; + for (i=i0, a=ac; iop - op + Oarg) { + case Oarg: + if (KBASE(i->cls) == 0) + pn = &nint; + else + pn = &nsse; + if (*pn > 0) { + --*pn; + a->inmem = 0; + } else + a->inmem = 2; + a->align = 3; + a->size = 8; + a->cls[0] = i->cls; + break; + case Oargc: + n = i->arg[0].val; + typclass(a, &typ[n]); + if (a->inmem) + continue; + ni = ns = 0; + for (n=0; (uint)n*8size; n++) + if (KBASE(a->cls[n]) == 0) + ni++; + else + ns++; + if (nint >= ni && nsse >= ns) { + nint -= ni; + nsse -= ns; + } else + a->inmem = 1; + break; + case Oarge: + if (op == Opar) + *env = i->to; + else + *env = i->arg[0]; + break; + } + + return ((6-nint) << 4) | ((8-nsse) << 8); +} + +int amd64_sysv_rsave[] = { + RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1 +}; +int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1}; + +MAKESURE(sysv_arrays_ok, + sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) && + sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int) +); + +/* layout of call's second argument (RCall) + * + * 29 12 8 4 3 0 + * |0...00|x|xxxx|xxxx|xx|xx| range + * | | | | ` gp regs returned (0..2) + * | | | ` sse regs returned (0..2) + * | | ` gp regs passed (0..6) + * | ` sse regs passed (0..8) + * ` 1 if rax is used to pass data (0..1) + */ + +bits +amd64_sysv_retregs(Ref r, int p[2]) +{ + bits b; + int ni, nf; + + assert(rtype(r) == RCall); + b = 0; + ni = r.val & 3; + nf = (r.val >> 2) & 3; + if (ni >= 1) + b |= BIT(RAX); + if (ni >= 2) + b |= BIT(RDX); + if (nf >= 1) + b |= BIT(XMM0); + if (nf >= 2) + b |= BIT(XMM1); + if (p) { + p[0] = ni; + p[1] = nf; + } + return b; +} + +bits +amd64_sysv_argregs(Ref r, int p[2]) +{ + bits b; + int j, ni, nf, ra; + + assert(rtype(r) == RCall); + b = 0; + ni = (r.val >> 4) & 15; + nf = (r.val >> 8) & 15; + ra = (r.val >> 12) & 1; + for (j=0; jarg[1], R)) { + assert(rtype(i1->arg[1]) == RType); + typclass(&aret, &typ[i1->arg[1].val]); + ca = argsclass(i0, i1, ac, Oarg, &aret, &env); + } else + ca = argsclass(i0, i1, ac, Oarg, 0, &env); + + for (stk=0, a=&ac[i1-i0]; a>ac;) + if ((--a)->inmem) { + if (a->align > 4) + err("sysv abi requires alignments of 16 or less"); + stk += a->size; + if (a->align == 4) + stk += stk & 15; + } + stk += stk & 15; + if (stk) { + r = getcon(-(int64_t)stk, fn); + emit(Osalloc, Kl, R, r, R); + } + + if (!req(i1->arg[1], R)) { + if (aret.inmem) { + /* get the return location from eax + * it saves one callee-save reg */ + r1 = newtmp("abi", Kl, fn); + emit(Ocopy, Kl, i1->to, TMP(RAX), R); + ca += 1; + } else { + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + aret.ref[1] = newtmp("abi", aret.cls[1], fn); + emit(Ostorel, 0, R, aret.ref[1], r); + emit(Oadd, Kl, r, i1->to, getcon(8, fn)); + } + aret.ref[0] = newtmp("abi", aret.cls[0], fn); + emit(Ostorel, 0, R, aret.ref[0], i1->to); + ca += retr(reg, &aret); + if (aret.size > 8) + emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R); + emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R); + r1 = i1->to; + } + /* allocate return pad */ + ra = alloc(sizeof *ra); + /* specific to NAlign == 3 */ + al = aret.align >= 2 ? aret.align - 2 : 0; + ra->i = (Ins){Oalloc+al, r1, {getcon(aret.size, fn)}, Kl}; + ra->link = (*rap); + *rap = ra; + } else { + ra = 0; + if (KBASE(i1->cls) == 0) { + emit(Ocopy, i1->cls, i1->to, TMP(RAX), R); + ca += 1; + } else { + emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R); + ca += 1 << 2; + } + } + envc = !req(R, env); + varc = i1->op == Ovacall; + if (varc && envc) + err("sysv abi does not support variadic env calls"); + ca |= (varc | envc) << 12; + emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca)); + if (envc) + emit(Ocopy, Kl, TMP(RAX), env, R); + if (varc) + emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); + + ni = ns = 0; + if (ra && aret.inmem) + emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ + for (i=i0, a=ac; iinmem) + continue; + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == Oargc) { + if (a->size > 8) { + r2 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + emit(Oload, a->cls[1], r2, r, R); + emit(Oadd, Kl, r, i->arg[1], getcon(8, fn)); + } + emit(Oload, a->cls[0], r1, i->arg[1], R); + } else + emit(Ocopy, i->cls, r1, i->arg[0], R); + } + + if (!stk) + return; + + r = newtmp("abi", Kl, fn); + for (i=i0, a=ac, off=0; iinmem) + continue; + if (i->op == Oargc) { + if (a->align == 4) + off += off & 15; + blit(r, off, i->arg[1], a->size, fn); + } else { + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, i->arg[0], r1); + emit(Oadd, Kl, r1, r, getcon(off, fn)); + } + off += a->size; + } + emit(Osalloc, Kl, r, getcon(stk, fn), R); +} + +static int +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + AClass *ac, *a, aret; + Ins *i; + int ni, ns, s, al, fa; + Ref r, env; + + env = R; + ac = alloc((i1-i0) * sizeof ac[0]); + curi = &insb[NIns]; + ni = ns = 0; + + if (fn->retty >= 0) { + typclass(&aret, &typ[fn->retty]); + fa = argsclass(i0, i1, ac, Opar, &aret, &env); + } else + fa = argsclass(i0, i1, ac, Opar, 0, &env); + + for (i=i0, a=ac; iop != Oparc || a->inmem) + continue; + if (a->size > 8) { + r = newtmp("abi", Kl, fn); + a->ref[1] = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, a->ref[1], r); + emit(Oadd, Kl, r, i->to, getcon(8, fn)); + } + a->ref[0] = newtmp("abi", Kl, fn); + emit(Ostorel, 0, R, a->ref[0], i->to); + /* specific to NAlign == 3 */ + al = a->align >= 2 ? a->align - 2 : 0; + emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R); + } + + if (fn->retty >= 0 && aret.inmem) { + r = newtmp("abi", Kl, fn); + emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R); + fn->retr = r; + } + + for (i=i0, a=ac, s=4; iinmem) { + case 1: + if (a->align > 4) + err("sysv abi requires alignments of 16 or less"); + if (a->align == 4) + s = (s+3) & -4; + fn->tmp[i->to.val].slot = -s; + s += a->size / 4; + continue; + case 2: + emit(Oload, i->cls, i->to, SLOT(-s), R); + s += 2; + continue; + } + r = rarg(a->cls[0], &ni, &ns); + if (i->op == Oparc) { + emit(Ocopy, Kl, a->ref[0], r, R); + if (a->size > 8) { + r = rarg(a->cls[1], &ni, &ns); + emit(Ocopy, Kl, a->ref[1], r, R); + } + } else + emit(Ocopy, i->cls, i->to, r, R); + } + + if (!req(R, env)) + emit(Ocopy, Kl, env, TMP(RAX), R); + + return fa | (s*4)<<12; +} + +static Blk * +split(Fn *fn, Blk *b) +{ + Blk *bn; + + ++fn->nblk; + bn = blknew(); + bn->nins = &insb[NIns] - curi; + idup(&bn->ins, curi, bn->nins); + curi = &insb[NIns]; + bn->visit = ++b->visit; + snprintf(bn->name, NString, "%s.%d", b->name, b->visit); + bn->loop = b->loop; + bn->link = b->link; + b->link = bn; + return bn; +} + +static void +chpred(Blk *b, Blk *bp, Blk *bp1) +{ + Phi *p; + uint a; + + for (p=b->phi; p; p=p->link) { + for (a=0; p->blk[a]!=bp; a++) + assert(a+1narg); + p->blk[a] = bp1; + } +} + +static void +selvaarg(Fn *fn, Blk *b, Ins *i) +{ + Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap; + Blk *b0, *bstk, *breg; + int isint; + + c4 = getcon(4, fn); + c8 = getcon(8, fn); + c16 = getcon(16, fn); + ap = i->arg[0]; + isint = KBASE(i->cls) == 0; + + /* @b [...] + r0 =l add ap, (0 or 4) + nr =l loadsw r0 + r1 =w cultw nr, (48 or 176) + jnz r1, @breg, @bstk + @breg + r0 =l add ap, 16 + r1 =l loadl r0 + lreg =l add r1, nr + r0 =w add nr, (8 or 16) + r1 =l add ap, (0 or 4) + storew r0, r1 + @bstk + r0 =l add ap, 8 + lstk =l loadl r0 + r1 =l add lstk, 8 + storel r1, r0 + @b0 + %loc =l phi @breg %lreg, @bstk %lstk + i->to =(i->cls) load %loc + */ + + loc = newtmp("abi", Kl, fn); + emit(Oload, i->cls, i->to, loc, R); + b0 = split(fn, b); + b0->jmp = b->jmp; + b0->s1 = b->s1; + b0->s2 = b->s2; + if (b->s1) + chpred(b->s1, b, b0); + if (b->s2 && b->s2 != b->s1) + chpred(b->s2, b, b0); + + lreg = newtmp("abi", Kl, fn); + nr = newtmp("abi", Kl, fn); + r0 = newtmp("abi", Kw, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorew, Kw, R, r0, r1); + emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4); + emit(Oadd, Kw, r0, nr, isint ? c8 : c16); + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Oadd, Kl, lreg, r1, nr); + emit(Oload, Kl, r1, r0, R); + emit(Oadd, Kl, r0, ap, c16); + breg = split(fn, b); + breg->jmp.type = Jjmp; + breg->s1 = b0; + + lstk = newtmp("abi", Kl, fn); + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, lstk, c8); + emit(Oload, Kl, lstk, r0, R); + emit(Oadd, Kl, r0, ap, c8); + bstk = split(fn, b); + bstk->jmp.type = Jjmp; + bstk->s1 = b0; + + b0->phi = alloc(sizeof *b0->phi); + *b0->phi = (Phi){ + .cls = Kl, .to = loc, + .narg = 2, + .blk = {bstk, breg}, + .arg = {lstk, lreg}, + }; + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kw, fn); + b->jmp.type = Jjnz; + b->jmp.arg = r1; + b->s1 = breg; + b->s2 = bstk; + c = getcon(isint ? 48 : 176, fn); + emit(Ocmpw+Ciult, Kw, r1, nr, c); + emit(Oloadsw, Kl, nr, r0, R); + emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4); +} + +static void +selvastart(Fn *fn, int fa, Ref ap) +{ + Ref r0, r1; + int gp, fp, sp; + + gp = ((fa >> 4) & 15) * 8; + fp = 48 + ((fa >> 8) & 15) * 16; + sp = fa >> 12; + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn)); + emit(Oadd, Kl, r0, ap, getcon(16, fn)); + r0 = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(Ostorel, Kw, R, r1, r0); + emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn)); + emit(Oadd, Kl, r0, ap, getcon(8, fn)); + r0 = newtmp("abi", Kl, fn); + emit(Ostorew, Kw, R, getcon(fp, fn), r0); + emit(Oadd, Kl, r0, ap, getcon(4, fn)); + emit(Ostorew, Kw, R, getcon(gp, fn), ap); +} + +void +amd64_sysv_abi(Fn *fn) +{ + Blk *b; + Ins *i, *i0, *ip; + RAlloc *ral; + int n, fa; + + for (b=fn->start; b; b=b->link) + b->visit = 0; + + /* lower parameters */ + for (b=fn->start, i=b->ins; i-b->insnins; i++) + if (!ispar(i->op)) + break; + fa = selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (&insb[NIns] - curi); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, curi, &insb[NIns] - curi); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower calls, returns, and vararg instructions */ + ral = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + if (b->visit) + continue; + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + switch ((--i)->op) { + default: + emiti(*i); + break; + case Ocall: + case Ovacall: + for (i0=i; i0>b->ins; i0--) + if (!isarg((i0-1)->op)) + break; + selcall(fn, i0, i, &ral); + i = i0; + break; + case Ovastart: + selvastart(fn, fa, i->arg[0]); + break; + case Ovaarg: + selvaarg(fn, b, i); + break; + case Oarg: + case Oargc: + die("unreachable"); + } + if (b == fn->start) + for (; ral; ral=ral->link) + emiti(ral->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After ABI lowering:\n"); + printfn(fn, stderr); + } +} diff --git a/amd64/targ.c b/amd64/targ.c new file mode 100644 index 0000000..e227574 --- /dev/null +++ b/amd64/targ.c @@ -0,0 +1,30 @@ +#include "all.h" + +Amd64Op amd64_op[NOp] = { +#define O(op, t, x) [O##op] = +#define X(nm, zf, lf) { nm, zf, lf, }, + #include "../ops.h" +}; + +static int +amd64_memargs(int op) +{ + return amd64_op[op].nmem; +} + +Target T_amd64_sysv = { + .gpr0 = RAX, + .ngpr = NGPR, + .fpr0 = XMM0, + .nfpr = NFPR, + .rglob = BIT(RBP) | BIT(RSP), + .nrglob = 2, + .rsave = amd64_sysv_rsave, + .nrsave = {NGPS, NFPS}, + .retregs = amd64_sysv_retregs, + .argregs = amd64_sysv_argregs, + .memargs = amd64_memargs, + .abi = amd64_sysv_abi, + .isel = amd64_isel, + .emitfn = amd64_emitfn, +}; diff --git a/cfg.c b/cfg.c index dff0765..ea1ae12 100644 --- a/cfg.c +++ b/cfg.c @@ -312,8 +312,8 @@ simpljmp(Fn *fn) uffind(&b->s1, uf); if (b->s2) uffind(&b->s2, uf); - c = b->jmp.type - Jxjc; - if (0 <= c && c <= NXICmp) + c = b->jmp.type - Jjf; + if (0 <= c && c <= NCmp) if (b->s1 == b->s2) { b->jmp.type = Jjmp; b->s2 = 0; diff --git a/emit.c b/emit.c deleted file mode 100644 index 138bc1d..0000000 --- a/emit.c +++ /dev/null @@ -1,696 +0,0 @@ -#include "all.h" - -char *locprefix, *symprefix; - -enum { - SLong = 0, - SWord = 1, - SShort = 2, - SByte = 3, - - Ki = -1, /* matches Kw and Kl */ - Ka = -2, /* matches all classes */ -}; - -/* Instruction format strings: - * - * if the format string starts with -, the instruction - * is assumed to be 3-address and is put in 2-address - * mode using an extra mov if necessary - * - * if the format string starts with +, the same as the - * above applies, but commutativity is also assumed - * - * %k is used to set the class of the instruction, - * it'll expand to "l", "q", "ss", "sd", depending - * on the instruction class - * %0 designates the first argument - * %1 designates the second argument - * %= designates the result - * - * if %k is not used, a prefix to 0, 1, or = must be - * added, it can be: - * M - memory reference - * L - long (64 bits) - * W - word (32 bits) - * H - short (16 bits) - * B - byte (8 bits) - * S - single precision float - * D - double precision float - */ -static struct { - short op; - short cls; - char *asm; -} omap[] = { - { Oadd, Ka, "+add%k %1, %=" }, - { Osub, Ka, "-sub%k %1, %=" }, - { Oand, Ki, "+and%k %1, %=" }, - { Oor, Ki, "+or%k %1, %=" }, - { Oxor, Ki, "+xor%k %1, %=" }, - { Osar, Ki, "-sar%k %B1, %=" }, - { Oshr, Ki, "-shr%k %B1, %=" }, - { Oshl, Ki, "-shl%k %B1, %=" }, - { Omul, Ki, "+imul%k %1, %=" }, - { Omul, Ks, "+mulss %1, %=" }, - { Omul, Kd, "+mulsd %1, %=" }, - { Odiv, Ka, "-div%k %1, %=" }, - { Ostorel, Ka, "movq %L0, %M1" }, - { Ostorew, Ka, "movl %W0, %M1" }, - { Ostoreh, Ka, "movw %H0, %M1" }, - { Ostoreb, Ka, "movb %B0, %M1" }, - { Ostores, Ka, "movss %S0, %M1" }, - { Ostored, Ka, "movsd %D0, %M1" }, - { Oload, Ka, "mov%k %M0, %=" }, - { Oloadsw, Kl, "movslq %M0, %L=" }, - { Oloadsw, Kw, "movl %M0, %W=" }, - { Oloaduw, Ki, "movl %M0, %W=" }, - { Oloadsh, Ki, "movsw%k %M0, %=" }, - { Oloaduh, Ki, "movzw%k %M0, %=" }, - { Oloadsb, Ki, "movsb%k %M0, %=" }, - { Oloadub, Ki, "movzb%k %M0, %=" }, - { Oextsw, Kl, "movslq %W0, %L=" }, - { Oextuw, Kl, "movl %W0, %W=" }, - { Oextsh, Ki, "movsw%k %H0, %=" }, - { Oextuh, Ki, "movzw%k %H0, %=" }, - { Oextsb, Ki, "movsb%k %B0, %=" }, - { Oextub, Ki, "movzb%k %B0, %=" }, - - { Oexts, Kd, "cvtss2sd %0, %=" }, - { Otruncd, Ks, "cvttsd2ss %0, %=" }, - { Ostosi, Ki, "cvttss2si%k %0, %=" }, - { Odtosi, Ki, "cvttsd2si%k %0, %=" }, - { Oswtof, Ka, "cvtsi2%k %W0, %=" }, - { Osltof, Ka, "cvtsi2%k %L0, %=" }, - { Ocast, Ki, "movq %D0, %L=" }, - { Ocast, Ka, "movq %L0, %D=" }, - - { Oaddr, Ki, "lea%k %M0, %=" }, - { Oswap, Ki, "xchg%k %0, %1" }, - { Osign, Kl, "cqto" }, - { Osign, Kw, "cltd" }, - { Oxdiv, Ki, "div%k %0" }, - { Oxidiv, Ki, "idiv%k %0" }, - { Oxcmp, Ks, "comiss %S0, %S1" }, - { Oxcmp, Kd, "comisd %D0, %D1" }, - { Oxcmp, Ki, "cmp%k %0, %1" }, - { Oxtest, Ki, "test%k %0, %1" }, - { Oxset+ICule, Ki, "setbe %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICult, Ki, "setb %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICugt, Ki, "seta %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICuge, Ki, "setae %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" }, - { Oxset+ICxnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" }, - { Oxset+ICxp, Ki, "setp %B=\n\tmovsb%k %B=, %=" }, - { NOp, 0, 0 } -}; - -static char *rname[][4] = { - [RAX] = {"rax", "eax", "ax", "al"}, - [RBX] = {"rbx", "ebx", "bx", "bl"}, - [RCX] = {"rcx", "ecx", "cx", "cl"}, - [RDX] = {"rdx", "edx", "dx", "dl"}, - [RSI] = {"rsi", "esi", "si", "sil"}, - [RDI] = {"rdi", "edi", "di", "dil"}, - [RBP] = {"rbp", "ebp", "bp", "bpl"}, - [RSP] = {"rsp", "esp", "sp", "spl"}, - [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, - [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, - [R10] = {"r10", "r10d", "r10w", "r10b"}, - [R11] = {"r11", "r11d", "r11w", "r11b"}, - [R12] = {"r12", "r12d", "r12w", "r12b"}, - [R13] = {"r13", "r13d", "r13w", "r13b"}, - [R14] = {"r14", "r14d", "r14w", "r14b"}, - [R15] = {"r15", "r15d", "r15w", "r15b"}, -}; - - -static int -slot(int s, Fn *fn) -{ - struct { int i:29; } x; - - /* sign extend s using a bitfield */ - x.i = s; - assert(x.i <= fn->slot); - /* specific to NAlign == 3 */ - if (x.i < 0) - return -4 * x.i; - else if (fn->vararg) - return -176 + -4 * (fn->slot - x.i); - else - return -4 * (fn->slot - x.i); -} - -static void -emitcon(Con *con, FILE *f) -{ - switch (con->type) { - case CAddr: - if (con->local) - fprintf(f, "%s%s", locprefix, con->label); - else - fprintf(f, "%s%s", symprefix, con->label); - if (con->bits.i) - fprintf(f, "%+"PRId64, con->bits.i); - break; - case CBits: - fprintf(f, "%"PRId64, con->bits.i); - break; - default: - die("unreachable"); - } -} - -static char * -regtoa(int reg, int sz) -{ - static char buf[6]; - - if (reg >= XMM0) { - sprintf(buf, "xmm%d", reg-XMM0); - return buf; - } else - return rname[reg][sz]; -} - -static Ref -getarg(char c, Ins *i) -{ - switch (c) { - case '0': - return i->arg[0]; - case '1': - return i->arg[1]; - case '=': - return i->to; - default: - die("invalid arg letter %c", c); - } -} - -static void emitins(Ins, Fn *, FILE *); - -static void -emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) -{ - Ins icp; - - icp.op = Ocopy; - icp.arg[0] = r2; - icp.to = r1; - icp.cls = k; - emitins(icp, fn, f); -} - -static void -emitf(char *s, Ins *i, Fn *fn, FILE *f) -{ - static char clstoa[][3] = {"l", "q", "ss", "sd"}; - char c; - int sz; - Ref ref; - Mem *m; - Con off; - - switch (*s) { - case '+': - if (req(i->arg[1], i->to)) { - ref = i->arg[0]; - i->arg[0] = i->arg[1]; - i->arg[1] = ref; - } - /* fall through */ - case '-': - assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) && - "cannot convert to 2-address"); - emitcopy(i->to, i->arg[0], i->cls, fn, f); - s++; - break; - } - - fputc('\t', f); -Next: - while ((c = *s++) != '%') - if (!c) { - fputc('\n', f); - return; - } else - fputc(c, f); - switch ((c = *s++)) { - case '%': - fputc('%', f); - break; - case 'k': - fputs(clstoa[i->cls], f); - break; - case '0': - case '1': - case '=': - sz = KWIDE(i->cls) ? SLong : SWord; - s--; - goto Ref; - case 'D': - case 'S': - sz = SLong; /* does not matter for floats */ - Ref: - c = *s++; - ref = getarg(c, i); - switch (rtype(ref)) { - case RTmp: - assert(isreg(ref)); - fprintf(f, "%%%s", regtoa(ref.val, sz)); - break; - case RSlot: - fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); - break; - case RMem: - Mem: - m = &fn->mem[ref.val]; - if (rtype(m->base) == RSlot) { - off.type = CBits; - off.bits.i = slot(m->base.val, fn); - addcon(&m->offset, &off); - m->base = TMP(RBP); - } - if (m->offset.type != CUndef) - emitcon(&m->offset, f); - fputc('(', f); - if (req(m->base, R)) - fprintf(f, "%%rip"); - else - fprintf(f, "%%%s", regtoa(m->base.val, SLong)); - if (!req(m->index, R)) - fprintf(f, ", %%%s, %d", - regtoa(m->index.val, SLong), - m->scale - ); - fputc(')', f); - break; - case RCon: - fputc('$', f); - emitcon(&fn->con[ref.val], f); - break; - default: - die("unreachable"); - } - break; - case 'L': - sz = SLong; - goto Ref; - case 'W': - sz = SWord; - goto Ref; - case 'H': - sz = SShort; - goto Ref; - case 'B': - sz = SByte; - goto Ref; - case 'M': - c = *s++; - ref = getarg(c, i); - switch (rtype(ref)) { - case RMem: - goto Mem; - case RSlot: - fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); - break; - case RCon: - emitcon(&fn->con[ref.val], f); - fprintf(f, "(%%rip)"); - break; - case RTmp: - assert(isreg(ref)); - fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); - break; - default: - die("unreachable"); - } - break; - default: - die("invalid format specifier %%%c", c); - } - goto Next; -} - -static void -emitins(Ins i, Fn *fn, FILE *f) -{ - Ref r; - int64_t val; - int o; - - switch (i.op) { - default: - Table: - /* most instructions are just pulled out of - * the table omap[], some special cases are - * detailed below */ - for (o=0;; o++) { - /* this linear search should really be a binary - * search */ - if (omap[o].op == NOp) - die("no match for %s(%d)", opdesc[i.op].name, i.cls); - if (omap[o].op == i.op) - if (omap[o].cls == i.cls - || (omap[o].cls == Ki && KBASE(i.cls) == 0) - || (omap[o].cls == Ka)) - break; - } - emitf(omap[o].asm, &i, fn, f); - break; - case Onop: - /* just do nothing for nops, they are inserted - * by some passes */ - break; - case Omul: - /* here, we try to use the 3-addresss form - * of multiplication when possible */ - if (rtype(i.arg[1]) == RCon) { - r = i.arg[0]; - i.arg[0] = i.arg[1]; - i.arg[1] = r; - } - if (KBASE(i.cls) == 0 /* only available for ints */ - && rtype(i.arg[0]) == RCon - && rtype(i.arg[1]) == RTmp) { - emitf("imul%k %0, %1, %=", &i, fn, f); - break; - } - goto Table; - case Osub: - /* we have to use the negation trick to handle - * some 3-address substractions */ - if (req(i.to, i.arg[1])) { - emitf("neg%k %=", &i, fn, f); - emitf("add%k %0, %=", &i, fn, f); - break; - } - goto Table; - case Ocopy: - /* make sure we don't emit useless copies, - * also, we can use a trick to load 64-bits - * registers, it's detailed in my note below - * http://c9x.me/art/notes.html?09/19/2015 */ - if (req(i.to, R) || req(i.arg[0], R)) - break; - if (isreg(i.to) - && rtype(i.arg[0]) == RCon - && i.cls == Kl - && fn->con[i.arg[0].val].type == CBits - && (val = fn->con[i.arg[0].val].bits.i) >= 0 - && val <= UINT32_MAX) { - emitf("movl %W0, %W=", &i, fn, f); - } else if (isreg(i.to) - && rtype(i.arg[0]) == RCon - && fn->con[i.arg[0].val].type == CAddr) { - emitf("lea%k %M0, %=", &i, fn, f); - } else if (!req(i.arg[0], i.to)) - emitf("mov%k %0, %=", &i, fn, f); - break; - case Ocall: - /* calls simply have a weird syntax in AT&T - * assembly... */ - switch (rtype(i.arg[0])) { - case RCon: - fprintf(f, "\tcallq "); - emitcon(&fn->con[i.arg[0].val], f); - fprintf(f, "\n"); - break; - case RTmp: - emitf("callq *%L0", &i, fn, f); - break; - default: - die("invalid call argument"); - } - break; - case Osalloc: - /* there is no good reason why this is here - * maybe we should split Osalloc in 2 different - * instructions depending on the result - */ - emitf("subq %L0, %%rsp", &i, fn, f); - if (!req(i.to, R)) - emitcopy(i.to, TMP(RSP), Kl, fn, f); - break; - case Oswap: - if (KBASE(i.cls) == 0) - goto Table; - /* for floats, there is no swap instruction - * so we use xmm15 as a temporary - */ - emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); - emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); - emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); - break; - } -} - -static int -cneg(int cmp) -{ - switch (cmp) { - default: die("invalid int comparison %d", cmp); - case ICule: return ICugt; - case ICult: return ICuge; - case ICsle: return ICsgt; - case ICslt: return ICsge; - case ICsgt: return ICsle; - case ICsge: return ICslt; - case ICugt: return ICule; - case ICuge: return ICult; - case ICeq: return ICne; - case ICne: return ICeq; - case ICxnp: return ICxp; - case ICxp: return ICxnp; - } -} - -static int -framesz(Fn *fn) -{ - int i, o, f; - - /* specific to NAlign == 3 */ - for (i=0, o=0; ireg >> rclob[i]); - f = fn->slot; - f = (f + 3) & -4; - return 4*f + 8*o + 176*fn->vararg; -} - -void -emitfn(Fn *fn, FILE *f) -{ - static char *ctoa[] = { - [ICeq] = "z", - [ICule] = "be", - [ICult] = "b", - [ICsle] = "le", - [ICslt] = "l", - [ICsgt] = "g", - [ICsge] = "ge", - [ICugt] = "a", - [ICuge] = "ae", - [ICne] = "nz", - [ICxnp] = "np", - [ICxp] = "p" - }; - static int id0; - Blk *b, *s; - Ins *i, itmp; - int *r, c, fs, o, n, lbl; - - fprintf(f, ".text\n"); - if (fn->export) - fprintf(f, ".globl %s%s\n", symprefix, fn->name); - fprintf(f, - "%s%s:\n" - "\tpushq %%rbp\n" - "\tmovq %%rsp, %%rbp\n", - symprefix, fn->name - ); - fs = framesz(fn); - if (fs) - fprintf(f, "\tsub $%d, %%rsp\n", fs); - if (fn->vararg) { - o = -176; - for (r=rsave; r-rsave<6; ++r, o+=8) - fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o); - for (n=0; n<8; ++n, o+=16) - fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o); - } - for (r=rclob; r-rclob < NRClob; r++) - if (fn->reg & BIT(*r)) { - itmp.arg[0] = TMP(*r); - emitf("pushq %L0", &itmp, fn, f); - } - - for (lbl=0, b=fn->start; b; b=b->link) { - if (lbl || b->npred > 1) - fprintf(f, "%sbb%d:\n", locprefix, id0+b->id); - for (i=b->ins; i!=&b->ins[b->nins]; i++) - emitins(*i, fn, f); - lbl = 1; - switch (b->jmp.type) { - case Jret0: - for (r=&rclob[NRClob]; r>rclob;) - if (fn->reg & BIT(*--r)) { - itmp.arg[0] = TMP(*r); - emitf("popq %L0", &itmp, fn, f); - } - fprintf(f, - "\tleave\n" - "\tret\n" - ); - break; - case Jjmp: - Jmp: - if (b->s1 != b->link) - fprintf(f, "\tjmp %sbb%d\n", - locprefix, id0+b->s1->id); - else - lbl = 0; - break; - default: - c = b->jmp.type - Jxjc; - if (0 <= c && c <= NXICmp) { - if (b->link == b->s2) { - s = b->s1; - b->s1 = b->s2; - b->s2 = s; - } else - c = cneg(c); - fprintf(f, "\tj%s %sbb%d\n", ctoa[c], - locprefix, id0+b->s2->id); - goto Jmp; - } - die("unhandled jump %d", b->jmp.type); - } - } - id0 += fn->nblk; -} - -void -emitdat(Dat *d, FILE *f) -{ - static int align; - static char *dtoa[] = { - [DAlign] = ".align", - [DB] = "\t.byte", - [DH] = "\t.value", - [DW] = "\t.long", - [DL] = "\t.quad" - }; - - switch (d->type) { - case DStart: - align = 0; - fprintf(f, ".data\n"); - break; - case DEnd: - break; - case DName: - if (!align) - fprintf(f, ".align 8\n"); - if (d->export) - fprintf(f, ".globl %s%s\n", symprefix, d->u.str); - fprintf(f, "%s%s:\n", symprefix, d->u.str); - break; - case DZ: - fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num); - break; - default: - if (d->type == DAlign) - align = 1; - - if (d->isstr) { - if (d->type != DB) - err("strings only supported for 'b' currently"); - fprintf(f, "\t.ascii \"%s\"\n", d->u.str); - } - else if (d->isref) { - fprintf(f, "%s %s%+"PRId64"\n", - dtoa[d->type], d->u.ref.nam, - d->u.ref.off); - } - else { - fprintf(f, "%s %"PRId64"\n", - dtoa[d->type], d->u.num); - } - break; - } -} - -typedef struct FBits FBits; - -struct FBits { - union { - int64_t n; - float f; - double d; - } bits; - int wide; - FBits *link; -}; - -static FBits *stash; - -int -stashfp(int64_t n, int w) -{ - FBits **pb, *b; - int i; - - /* does a dumb de-dup of fp constants - * this should be the linker's job */ - for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++) - if (n == b->bits.n && w == b->wide) - return i; - b = emalloc(sizeof *b); - b->bits.n = n; - b->wide = w; - b->link = 0; - *pb = b; - return i; -} - -void -emitfin(FILE *f) -{ - FBits *b; - int i; - - if (!stash) - return; - fprintf(f, "/* floating point constants */\n"); - fprintf(f, ".data\n.align 8\n"); - for (b=stash, i=0; b; b=b->link, i++) - if (b->wide) - fprintf(f, - "%sfp%d:\n" - "\t.quad %"PRId64 - " /* %f */\n", - locprefix, i, b->bits.n, - b->bits.d - ); - for (b=stash, i=0; b; b=b->link, i++) - if (!b->wide) - fprintf(f, - "%sfp%d:\n" - "\t.long %"PRId64 - " /* %lf */\n", - locprefix, i, b->bits.n & 0xffffffff, - b->bits.f - ); - while ((b=stash)) { - stash = b->link; - free(b); - } -} diff --git a/fold.c b/fold.c index 6129421..55672dd 100644 --- a/fold.c +++ b/fold.c @@ -100,7 +100,7 @@ visitins(Ins *i, Fn *fn) if (rtype(i->to) != RTmp) return; - if (opdesc[i->op].cfold) { + if (optab[i->op].canfold) { l = latval(i->arg[0]); if (!req(i->arg[1], R)) r = latval(i->arg[1]); @@ -114,7 +114,7 @@ visitins(Ins *i, Fn *fn) v = opfold(i->op, i->cls, &fn->con[l], &fn->con[r], fn); } else v = Bot; - /* fprintf(stderr, "\nvisiting %s (%p)", opdesc[i->op].name, (void *)i); */ + /* fprintf(stderr, "\nvisiting %s (%p)", optab[i->op].name, (void *)i); */ update(i->to.val, v, fn); } @@ -360,7 +360,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr) else if (cl->type == CAddr || cr->type == CAddr) { if (Ocmpl <= op && op <= Ocmpl1) return 1; - err("invalid address operand for '%s'", opdesc[op].name); + err("invalid address operand for '%s'", optab[op].name); } switch (op) { case Oadd: x = l.u + r.u; break; @@ -397,42 +397,42 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr) } else op -= Ocmpl - Ocmpw; switch (op - Ocmpw) { - case ICule: x = l.u <= r.u; break; - case ICult: x = l.u < r.u; break; - case ICsle: x = l.s <= r.s; break; - case ICslt: x = l.s < r.s; break; - case ICsgt: x = l.s > r.s; break; - case ICsge: x = l.s >= r.s; break; - case ICugt: x = l.u > r.u; break; - case ICuge: x = l.u >= r.u; break; - case ICeq: x = l.u == r.u; break; - case ICne: x = l.u != r.u; break; + case Ciule: x = l.u <= r.u; break; + case Ciult: x = l.u < r.u; break; + case Cisle: x = l.s <= r.s; break; + case Cislt: x = l.s < r.s; break; + case Cisgt: x = l.s > r.s; break; + case Cisge: x = l.s >= r.s; break; + case Ciugt: x = l.u > r.u; break; + case Ciuge: x = l.u >= r.u; break; + case Cieq: x = l.u == r.u; break; + case Cine: x = l.u != r.u; break; default: die("unreachable"); } } else if (Ocmps <= op && op <= Ocmps1) { switch (op - Ocmps) { - case FCle: x = l.fs <= r.fs; break; - case FClt: x = l.fs < r.fs; break; - case FCgt: x = l.fs > r.fs; break; - case FCge: x = l.fs >= r.fs; break; - case FCne: x = l.fs != r.fs; break; - case FCeq: x = l.fs == r.fs; break; - case FCo: x = l.fs < r.fs || l.fs >= r.fs; break; - case FCuo: x = !(l.fs < r.fs || l.fs >= r.fs); break; + case Cfle: x = l.fs <= r.fs; break; + case Cflt: x = l.fs < r.fs; break; + case Cfgt: x = l.fs > r.fs; break; + case Cfge: x = l.fs >= r.fs; break; + case Cfne: x = l.fs != r.fs; break; + case Cfeq: x = l.fs == r.fs; break; + case Cfo: x = l.fs < r.fs || l.fs >= r.fs; break; + case Cfuo: x = !(l.fs < r.fs || l.fs >= r.fs); break; default: die("unreachable"); } } else if (Ocmpd <= op && op <= Ocmpd1) { switch (op - Ocmpd) { - case FCle: x = l.fd <= r.fd; break; - case FClt: x = l.fd < r.fd; break; - case FCgt: x = l.fd > r.fd; break; - case FCge: x = l.fd >= r.fd; break; - case FCne: x = l.fd != r.fd; break; - case FCeq: x = l.fd == r.fd; break; - case FCo: x = l.fd < r.fd || l.fd >= r.fd; break; - case FCuo: x = !(l.fd < r.fd || l.fd >= r.fd); break; + case Cfle: x = l.fd <= r.fd; break; + case Cflt: x = l.fd < r.fd; break; + case Cfgt: x = l.fd > r.fd; break; + case Cfge: x = l.fd >= r.fd; break; + case Cfne: x = l.fd != r.fd; break; + case Cfeq: x = l.fd == r.fd; break; + case Cfo: x = l.fd < r.fd || l.fd >= r.fd; break; + case Cfuo: x = !(l.fd < r.fd || l.fd >= r.fd); break; default: die("unreachable"); } } @@ -453,7 +453,7 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr) double xd, ld, rd; if (cl->type != CBits || cr->type != CBits) - err("invalid address operand for '%s'", opdesc[op].name); + err("invalid address operand for '%s'", optab[op].name); if (w) { ld = cl->bits.d; rd = cr->bits.d; @@ -495,7 +495,7 @@ opfold(int op, int cls, Con *cl, Con *cr, Fn *fn) if ((op == Odiv || op == Oudiv || op == Orem || op == Ourem) && czero(cr, KWIDE(cls))) - err("null divisor in '%s'", opdesc[op].name); + err("null divisor in '%s'", optab[op].name); if (cls == Kw || cls == Kl) { if (foldint(&c, op, cls == Kl, cl, cr)) return Bot; diff --git a/gas.c b/gas.c new file mode 100644 index 0000000..c1fd6df --- /dev/null +++ b/gas.c @@ -0,0 +1,122 @@ +#include "all.h" + + +char *gasloc, *gassym; + +void +gasemitdat(Dat *d, FILE *f) +{ + static int align; + static char *dtoa[] = { + [DAlign] = ".align", + [DB] = "\t.byte", + [DH] = "\t.short", + [DW] = "\t.int", + [DL] = "\t.quad" + }; + + switch (d->type) { + case DStart: + align = 0; + fprintf(f, ".data\n"); + break; + case DEnd: + break; + case DName: + if (!align) + fprintf(f, ".align 8\n"); + if (d->export) + fprintf(f, ".globl %s%s\n", gassym, d->u.str); + fprintf(f, "%s%s:\n", gassym, d->u.str); + break; + case DZ: + fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num); + break; + default: + if (d->type == DAlign) + align = 1; + + if (d->isstr) { + if (d->type != DB) + err("strings only supported for 'b' currently"); + fprintf(f, "\t.ascii \"%s\"\n", d->u.str); + } + else if (d->isref) { + fprintf(f, "%s %s%+"PRId64"\n", + dtoa[d->type], d->u.ref.nam, + d->u.ref.off); + } + else { + fprintf(f, "%s %"PRId64"\n", + dtoa[d->type], d->u.num); + } + break; + } +} + +typedef struct FBits FBits; + +struct FBits { + union { + int64_t n; + float f; + double d; + } bits; + int wide; + FBits *link; +}; + +static FBits *stash; + +int +gasstashfp(int64_t n, int w) +{ + FBits **pb, *b; + int i; + + /* does a dumb de-dup of fp constants + * this should be the linker's job */ + for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++) + if (n == b->bits.n && w == b->wide) + return i; + b = emalloc(sizeof *b); + b->bits.n = n; + b->wide = w; + b->link = 0; + *pb = b; + return i; +} + +void +gasemitfin(FILE *f) +{ + FBits *b; + int i; + + if (!stash) + return; + fprintf(f, "/* floating point constants */\n"); + fprintf(f, ".data\n.align 8\n"); + for (b=stash, i=0; b; b=b->link, i++) + if (b->wide) + fprintf(f, + "%sfp%d:\n" + "\t.quad %"PRId64 + " /* %f */\n", + gasloc, i, b->bits.n, + b->bits.d + ); + for (b=stash, i=0; b; b=b->link, i++) + if (!b->wide) + fprintf(f, + "%sfp%d:\n" + "\t.long %"PRId64 + " /* %lf */\n", + gasloc, i, b->bits.n & 0xffffffff, + b->bits.f + ); + while ((b=stash)) { + stash = b->link; + free(b); + } +} diff --git a/isel.c b/isel.c deleted file mode 100644 index 6ce5dd0..0000000 --- a/isel.c +++ /dev/null @@ -1,649 +0,0 @@ -#include "all.h" -#include - -/* For x86_64, do the following: - * - * - check that constants are used only in - * places allowed - * - ensure immediates always fit in 32b - * - expose machine register contraints - * on instructions like division. - * - implement fast locals (the streak of - * constant allocX in the first basic block) - * - recognize complex addressing modes - * - * Invariant: the use counts that are used - * in sel() must be sound. This - * is not so trivial, maybe the - * dce should be moved out... - */ - -typedef struct ANum ANum; - -struct ANum { - char n, l, r; - Ins *i; -}; - -static void amatch(Addr *, Ref, ANum *, Fn *, int); - -static int -fcmptoi(int fc) -{ - switch (fc) { - default: die("invalid fp comparison %d", fc); - case FCle: return ICule; - case FClt: return ICult; - case FCgt: return ICugt; - case FCge: return ICuge; - case FCne: return ICne; - case FCeq: return ICeq; - case FCo: return ICxnp; - case FCuo: return ICxp; - } -} - -static int -iscmp(int op, int *pk, int *pc) -{ - if (Ocmpw <= op && op <= Ocmpw1) { - *pc = op - Ocmpw; - *pk = Kw; - } - else if (Ocmpl <= op && op <= Ocmpl1) { - *pc = op - Ocmpl; - *pk = Kl; - } - else if (Ocmps <= op && op <= Ocmps1) { - *pc = fcmptoi(op - Ocmps); - *pk = Ks; - } - else if (Ocmpd <= op && op <= Ocmpd1) { - *pc = fcmptoi(op - Ocmpd); - *pk = Kd; - } - else - return 0; - return 1; -} - -static int -noimm(Ref r, Fn *fn) -{ - int64_t val; - - if (rtype(r) != RCon) - return 0; - switch (fn->con[r.val].type) { - case CAddr: - /* we only support the 'small' - * code model of the ABI, this - * means that we can always - * address data with 32bits - */ - return 0; - case CBits: - val = fn->con[r.val].bits.i; - return (val < INT32_MIN || val > INT32_MAX); - default: - die("invalid constant"); - } -} - -static int -rslot(Ref r, Fn *fn) -{ - if (rtype(r) != RTmp) - return -1; - return fn->tmp[r.val].slot; -} - -static int -argcls(Ins *i, int n) -{ - return opdesc[i->op].argcls[n][i->cls]; -} - -static void -fixarg(Ref *r, int k, int phi, Fn *fn) -{ - Addr a, *m; - Ref r0, r1; - int s, n; - - r1 = r0 = *r; - s = rslot(r0, fn); - if (KBASE(k) == 1 && rtype(r0) == RCon) { - /* load floating points from memory - * slots, they can't be used as - * immediates - */ - r1 = MEM(fn->nmem); - vgrow(&fn->mem, ++fn->nmem); - memset(&a, 0, sizeof a); - a.offset.type = CAddr; - a.offset.local = 1; - n = stashfp(fn->con[r0.val].bits.i, KWIDE(k)); - sprintf(a.offset.label, "fp%d", n); - fn->mem[fn->nmem-1] = a; - } - else if (!phi && k == Kl && noimm(r0, fn)) { - /* load constants that do not fit in - * a 32bit signed integer into a - * long temporary - */ - r1 = newtmp("isel", Kl, fn); - emit(Ocopy, Kl, r1, r0, R); - } - else if (s != -1) { - /* load fast locals' addresses into - * temporaries right before the - * instruction - */ - r1 = newtmp("isel", Kl, fn); - emit(Oaddr, Kl, r1, SLOT(s), R); - } - else if (rtype(r0) == RMem) { - /* apple asm fix */ - m = &fn->mem[r0.val]; - if (req(m->base, R)) { - n = fn->ncon; - vgrow(&fn->con, ++fn->ncon); - fn->con[n] = m->offset; - m->offset.type = CUndef; - r0 = newtmp("isel", Kl, fn); - emit(Oaddr, Kl, r0, CON(n), R); - m->base = r0; - } - } - *r = r1; -} - -static void -seladdr(Ref *r, ANum *an, Fn *fn) -{ - Addr a; - Ref r0; - - r0 = *r; - if (rtype(r0) == RTmp) { - amatch(&a, r0, an, fn, 1); - if (req(a.base, r0)) - return; - if (a.offset.type == CAddr) - if (!req(a.base, R)) { - /* apple asm fix */ - if (!req(a.index, R)) - return; - else { - a.index = a.base; - a.scale = 1; - a.base = R; - } - } - chuse(r0, -1, fn); - vgrow(&fn->mem, ++fn->nmem); - fn->mem[fn->nmem-1] = a; - chuse(a.base, +1, fn); - chuse(a.index, +1, fn); - *r = MEM(fn->nmem-1); - } -} - -static int -selcmp(Ref arg[2], int k, Fn *fn) -{ - int swap; - Ref r, *iarg; - - swap = rtype(arg[0]) == RCon; - if (swap) { - r = arg[1]; - arg[1] = arg[0]; - arg[0] = r; - } - emit(Oxcmp, k, R, arg[1], arg[0]); - iarg = curi->arg; - if (rtype(arg[0]) == RCon) { - assert(k == Kl); - iarg[1] = newtmp("isel", k, fn); - emit(Ocopy, k, iarg[1], arg[0], R); - } - fixarg(&iarg[0], k, 0, fn); - fixarg(&iarg[1], k, 0, fn); - return swap; -} - -static void -sel(Ins i, ANum *an, Fn *fn) -{ - Ref r0, r1, *iarg; - int x, k, kc; - int64_t sz; - Ins *i0, *i1; - - if (rtype(i.to) == RTmp) - if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) - if (fn->tmp[i.to.val].nuse == 0) { - chuse(i.arg[0], -1, fn); - chuse(i.arg[1], -1, fn); - return; - } - i0 = curi; - k = i.cls; - switch (i.op) { - case Odiv: - case Orem: - case Oudiv: - case Ourem: - if (i.op == Odiv || i.op == Oudiv) - r0 = TMP(RAX), r1 = TMP(RDX); - else - r0 = TMP(RDX), r1 = TMP(RAX); - emit(Ocopy, k, i.to, r0, R); - emit(Ocopy, k, R, r1, R); - if (rtype(i.arg[1]) == RCon) { - /* immediates not allowed for - * divisions in x86 - */ - r0 = newtmp("isel", k, fn); - } else - r0 = i.arg[1]; - if (fn->tmp[r0.val].slot != -1) - err("unlikely argument %%%s in %s", - fn->tmp[r0.val].name, opdesc[i.op].name); - if (i.op == Odiv || i.op == Orem) { - emit(Oxidiv, k, R, r0, R); - emit(Osign, k, TMP(RDX), TMP(RAX), R); - } else { - emit(Oxdiv, k, R, r0, R); - emit(Ocopy, k, TMP(RDX), CON_Z, R); - } - emit(Ocopy, k, TMP(RAX), i.arg[0], R); - fixarg(&curi->arg[0], k, 0, fn); - if (rtype(i.arg[1]) == RCon) - emit(Ocopy, k, r0, i.arg[1], R); - break; - case Osar: - case Oshr: - case Oshl: - if (rtype(i.arg[1]) == RCon) - goto Emit; - r0 = i.arg[1]; - i.arg[1] = TMP(RCX); - emit(Ocopy, Kw, R, TMP(RCX), R); - emiti(i); - emit(Ocopy, Kw, TMP(RCX), r0, R); - break; - case Onop: - break; - case Ostored: - case Ostores: - case Ostorel: - case Ostorew: - case Ostoreh: - case Ostoreb: - if (rtype(i.arg[0]) == RCon) { - if (i.op == Ostored) - i.op = Ostorel; - if (i.op == Ostores) - i.op = Ostorew; - } - seladdr(&i.arg[1], an, fn); - goto Emit; - case_Oload: - seladdr(&i.arg[0], an, fn); - goto Emit; - case Ocall: - case Osalloc: - case Ocopy: - case Oadd: - case Osub: - case Omul: - case Oand: - case Oor: - case Oxor: - case Oxtest: - case Ostosi: - case Odtosi: - case Oswtof: - case Osltof: - case Oexts: - case Otruncd: - case Ocast: - case_OExt: -Emit: - emiti(i); - iarg = curi->arg; /* fixarg() can change curi */ - fixarg(&iarg[0], argcls(&i, 0), 0, fn); - fixarg(&iarg[1], argcls(&i, 1), 0, fn); - break; - case Oalloc: - case Oalloc+1: - case Oalloc+2: /* == Oalloc1 */ - /* we need to make sure - * the stack remains aligned - * (rsp = 0) mod 16 - */ - if (rtype(i.arg[0]) == RCon) { - sz = fn->con[i.arg[0].val].bits.i; - if (sz < 0 || sz >= INT_MAX-15) - err("invalid alloc size %"PRId64, sz); - sz = (sz + 15) & -16; - emit(Osalloc, Kl, i.to, getcon(sz, fn), R); - } else { - /* r0 = (i.arg[0] + 15) & -16 */ - r0 = newtmp("isel", Kl, fn); - r1 = newtmp("isel", Kl, fn); - emit(Osalloc, Kl, i.to, r0, R); - emit(Oand, Kl, r0, r1, getcon(-16, fn)); - emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn)); - if (fn->tmp[i.arg[0].val].slot != -1) - err("unlikely argument %%%s in %s", - fn->tmp[i.arg[0].val].name, opdesc[i.op].name); - } - break; - default: - if (isext(i.op)) - goto case_OExt; - if (isload(i.op)) - goto case_Oload; - if (iscmp(i.op, &kc, &x)) { - emit(Oxset+x, k, i.to, R, R); - i1 = curi; - if (selcmp(i.arg, kc, fn)) - i1->op = Oxset + icmpop(x); - break; - } - die("unknown instruction %s", opdesc[i.op].name); - } - - while (i0 > curi && --i0) { - assert(rslot(i0->arg[0], fn) == -1); - assert(rslot(i0->arg[1], fn) == -1); - } -} - -static Ins * -flagi(Ins *i0, Ins *i) -{ - while (i>i0) { - i--; - if (opdesc[i->op].sflag) - return i; - if (opdesc[i->op].lflag) - continue; - return 0; - } - return 0; -} - -static void -seljmp(Blk *b, Fn *fn) -{ - Ref r; - int c, k; - Ins *fi; - Tmp *t; - - if (b->jmp.type == Jret0 || b->jmp.type == Jjmp) - return; - assert(b->jmp.type == Jjnz); - r = b->jmp.arg; - t = &fn->tmp[r.val]; - b->jmp.arg = R; - assert(!req(r, R) && rtype(r) != RCon); - if (b->s1 == b->s2) { - chuse(r, -1, fn); - b->jmp.type = Jjmp; - b->s2 = 0; - return; - } - fi = flagi(b->ins, &b->ins[b->nins]); - if (!fi || !req(fi->to, r)) { - selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */ - b->jmp.type = Jxjc + ICne; - } - else if (iscmp(fi->op, &k, &c)) { - if (t->nuse == 1) { - if (selcmp(fi->arg, k, fn)) - c = icmpop(c); - *fi = (Ins){.op = Onop}; - } - b->jmp.type = Jxjc + c; - } - else if (fi->op == Oand && t->nuse == 1 - && (rtype(fi->arg[0]) == RTmp || - rtype(fi->arg[1]) == RTmp)) { - fi->op = Oxtest; - fi->to = R; - b->jmp.type = Jxjc + ICne; - if (rtype(fi->arg[1]) == RCon) { - r = fi->arg[1]; - fi->arg[1] = fi->arg[0]; - fi->arg[0] = r; - } - } - else { - /* since flags are not tracked in liveness, - * the result of the flag-setting instruction - * has to be marked as live - */ - if (t->nuse == 1) - emit(Ocopy, Kw, R, r, R); - b->jmp.type = Jxjc + ICne; - } -} - -static int -aref(Ref r, ANum *ai) -{ - switch (rtype(r)) { - case RCon: - return 2; - case RTmp: - return ai[r.val].n; - default: - die("constant or temporary expected"); - } -} - -static int -ascale(Ref r, Con *con) -{ - int64_t n; - - if (rtype(r) != RCon) - return 0; - if (con[r.val].type != CBits) - return 0; - n = con[r.val].bits.i; - return n == 1 || n == 2 || n == 4 || n == 8; -} - -static void -anumber(ANum *ai, Blk *b, Con *con) -{ - /* This should be made obsolete by a proper - * reassoc pass. - * - * Rules: - * - * RTmp(_) -> 0 tmp - * ( RTmp(_) -> 1 slot ) - * RCon(_) -> 2 con - * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) - */ - static char add[10][10] = { - [2] [2] = 2, /* folding */ - [2] [5] = 5, [5] [2] = 5, - [2] [6] = 6, [6] [2] = 6, - [2] [7] = 7, [7] [2] = 7, - [0] [0] = 4, /* 4: b + s * i */ - [0] [3] = 4, [3] [0] = 4, - [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */ - [0] [2] = 6, [2] [0] = 6, /* 6: o + b */ - [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */ - [0] [5] = 7, [5] [0] = 7, - [6] [3] = 7, [3] [6] = 7, - - }; - int a, a1, a2, n1, n2, t1, t2; - Ins *i; - - for (i=b->ins; i-b->ins < b->nins; i++) { - if (rtype(i->to) == RTmp) - ai[i->to.val].i = i; - if (i->op != Oadd && i->op != Omul) - continue; - a1 = aref(i->arg[0], ai); - a2 = aref(i->arg[1], ai); - t1 = a1 != 1 && a1 != 2; - t2 = a2 != 1 && a2 != 2; - if (i->op == Oadd) { - a = add[n1 = a1][n2 = a2]; - if (t1 && a < add[0][a2]) - a = add[n1 = 0][n2 = a2]; - if (t2 && a < add[a1][0]) - a = add[n1 = a1][n2 = 0]; - if (t1 && t2 && a < add[0][0]) - a = add[n1 = 0][n2 = 0]; - } else { - n1 = n2 = a = 0; - if (ascale(i->arg[0], con) && t2) - a = 3, n1 = 2, n2 = 0; - if (t1 && ascale(i->arg[1], con)) - a = 3, n1 = 0, n2 = 2; - } - ai[i->to.val].n = a; - ai[i->to.val].l = n1; - ai[i->to.val].r = n2; - } -} - -static void -amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top) -{ - Ins *i; - int nl, nr, t, s; - Ref al, ar; - - if (top) - memset(a, 0, sizeof *a); - if (rtype(r) == RCon) { - addcon(&a->offset, &fn->con[r.val]); - return; - } - assert(rtype(r) == RTmp); - i = ai[r.val].i; - nl = ai[r.val].l; - nr = ai[r.val].r; - if (i) { - if (nl > nr) { - al = i->arg[1]; - ar = i->arg[0]; - t = nl, nl = nr, nr = t; - } else { - al = i->arg[0]; - ar = i->arg[1]; - } - } - switch (ai[r.val].n) { - case 3: /* s * i */ - if (!top) { - a->index = al; - a->scale = fn->con[ar.val].bits.i; - } else - a->base = r; - break; - case 4: /* b + s * i */ - switch (nr) { - case 0: - if (fn->tmp[ar.val].slot != -1) { - al = i->arg[1]; - ar = i->arg[0]; - } - a->index = ar; - a->scale = 1; - break; - case 3: - amatch(a, ar, ai, fn, 0); - break; - } - r = al; - case 0: - s = fn->tmp[r.val].slot; - if (s != -1) - r = SLOT(s); - a->base = r; - break; - case 2: /* constants */ - case 5: /* o + s * i */ - case 6: /* o + b */ - case 7: /* o + b + s * i */ - amatch(a, ar, ai, fn, 0); - amatch(a, al, ai, fn, 0); - break; - default: - die("unreachable"); - } -} - -/* instruction selection - * requires use counts (as given by parsing) - */ -void -isel(Fn *fn) -{ - Blk *b, **sb; - Ins *i; - Phi *p; - uint a; - int n, al; - int64_t sz; - ANum *ainfo; - - /* assign slots to fast allocs */ - b = fn->start; - /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ - for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) - for (i=b->ins; i-b->ins < b->nins; i++) - if (i->op == al) { - if (rtype(i->arg[0]) != RCon) - break; - sz = fn->con[i->arg[0].val].bits.i; - if (sz < 0 || sz >= INT_MAX-15) - err("invalid alloc size %"PRId64, sz); - sz = (sz + n-1) & -n; - sz /= 4; - fn->tmp[i->to.val].slot = fn->slot; - fn->slot += sz; - *i = (Ins){.op = Onop}; - } - - /* process basic blocks */ - n = fn->ntmp; - ainfo = emalloc(n * sizeof ainfo[0]); - for (b=fn->start; b; b=b->link) { - curi = &insb[NIns]; - for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) - for (p=(*sb)->phi; p; p=p->link) { - for (a=0; p->blk[a] != b; a++) - assert(a+1 < p->narg); - fixarg(&p->arg[a], p->cls, 1, fn); - } - memset(ainfo, 0, n * sizeof ainfo[0]); - anumber(ainfo, b, fn->con); - seljmp(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) - sel(*--i, ainfo, fn); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } - free(ainfo); - - if (debug['I']) { - fprintf(stderr, "\n> After instruction selection:\n"); - printfn(fn, stderr); - } -} diff --git a/live.c b/live.c index 18c9b63..6e63705 100644 --- a/live.c +++ b/live.c @@ -104,31 +104,39 @@ Again: memset(phi, 0, f->ntmp * sizeof phi[0]); memset(nlv, 0, sizeof nlv); - b->out->t[0] |= RGLOB; + b->out->t[0] |= T.rglob; bscopy(b->in, b->out); for (t=0; bsiter(b->in, &t); t++) { phifix(t, phi, f->tmp); nlv[KBASE(f->tmp[t].cls)]++; } if (rtype(b->jmp.arg) == RCall) { - assert(bscount(b->in) == NRGlob && nlv[0] == NRGlob && nlv[1] == 0); - b->in->t[0] |= retregs(b->jmp.arg, nlv); + assert((int)bscount(b->in) == T.nrglob && + nlv[0] == T.nrglob && + nlv[1] == 0); + b->in->t[0] |= T.retregs(b->jmp.arg, nlv); } else bset(b->jmp.arg, b, nlv, phi, f->tmp); for (k=0; k<2; k++) b->nlive[k] = nlv[k]; for (i=&b->ins[b->nins]; i!=b->ins;) { if ((--i)->op == Ocall && rtype(i->arg[1]) == RCall) { - b->in->t[0] &= ~retregs(i->arg[1], m); - for (k=0; k<2; k++) + b->in->t[0] &= ~T.retregs(i->arg[1], m); + for (k=0; k<2; k++) { nlv[k] -= m[k]; - if (nlv[0] + NISave > b->nlive[0]) - b->nlive[0] = nlv[0] + NISave; - if (nlv[1] + NFSave > b->nlive[1]) - b->nlive[1] = nlv[1] + NFSave; - b->in->t[0] |= argregs(i->arg[1], m); - for (k=0; k<2; k++) + /* caller-save registers are used + * by the callee, in that sense, + * right in the middle of the call, + * they are live: */ + nlv[k] += T.nrsave[k]; + if (nlv[k] > b->nlive[k]) + b->nlive[k] = nlv[k]; + } + b->in->t[0] |= T.argregs(i->arg[1], m); + for (k=0; k<2; k++) { + nlv[k] -= T.nrsave[k]; nlv[k] += m[k]; + } } if (!req(i->to, R)) { assert(rtype(i->to) == RTmp); diff --git a/main.c b/main.c index 4d2e6bd..6098dee 100644 --- a/main.c +++ b/main.c @@ -3,6 +3,18 @@ #include #include +Target T; + +extern Target T_amd64_sysv; + +static struct TMap { + char *name; + Target *T; +} tmap[] = { + { "amd64_sysv", &T_amd64_sysv }, + { 0, 0 } +}; + enum Asm { Gasmacho, Gaself, @@ -33,7 +45,7 @@ data(Dat *d) fputs("/* end data */\n\n", outf); freeall(); } - emitdat(d, outf); + gasemitdat(d, outf); } static void @@ -62,10 +74,10 @@ func(Fn *fn) copy(fn); filluse(fn); fold(fn); - abi(fn); + T.abi(fn); fillpreds(fn); filluse(fn); - isel(fn); + T.isel(fn); fillrpo(fn); filllive(fn); fillcost(fn); @@ -83,7 +95,7 @@ func(Fn *fn) } else fn->rpo[n]->link = fn->rpo[n+1]; if (!dbg) { - emitfn(fn, outf); + T.emitfn(fn, outf); fprintf(outf, "/* end function %s */\n\n", fn->name); } else fprintf(stderr, "\n"); @@ -93,13 +105,15 @@ func(Fn *fn) int main(int ac, char *av[]) { - FILE *inf; - char *f; + struct TMap *tm; + FILE *inf, *hf; + char *f, *sep; int c, asm; - asm = Defaultasm; + asm = Defasm; + T = Deftgt; outf = stdout; - while ((c = getopt(ac, av, "hd:o:G:")) != -1) + while ((c = getopt(ac, av, "hd:o:G:t:")) != -1) switch (c) { case 'd': for (; *optarg; optarg++) @@ -112,6 +126,18 @@ main(int ac, char *av[]) if (strcmp(optarg, "-") != 0) outf = fopen(optarg, "w"); break; + case 't': + for (tm=tmap;; tm++) { + if (!tm->name) { + fprintf(stderr, "unknown target '%s'\n", optarg); + exit(1); + } + if (strcmp(optarg, tm->name) == 0) { + T = *tm->T; + break; + } + } + break; case 'G': if (strcmp(optarg, "e") == 0) asm = Gaself; @@ -124,22 +150,28 @@ main(int ac, char *av[]) break; case 'h': default: - fprintf(stderr, "%s [OPTIONS] {file.ssa, -}\n", av[0]); - fprintf(stderr, "\t%-10s prints this help\n", "-h"); - fprintf(stderr, "\t%-10s output to file\n", "-o file"); - fprintf(stderr, "\t%-10s generate gas (e) or osx (m) asm\n", "-G {e,m}"); - fprintf(stderr, "\t%-10s dump debug information\n", "-d "); + hf = c != 'h' ? stderr : stdout; + fprintf(hf, "%s [OPTIONS] {file.ssa, -}\n", av[0]); + fprintf(hf, "\t%-11s prints this help\n", "-h"); + fprintf(hf, "\t%-11s output to file\n", "-o file"); + fprintf(hf, "\t%-11s generate for a target among:\n", "-t "); + fprintf(hf, "\t%-11s ", ""); + for (tm=tmap, sep=""; tm->name; tm++, sep=", ") + fprintf(hf, "%s%s", sep, tm->name); + fprintf(hf, "\n"); + fprintf(hf, "\t%-11s generate gas (e) or osx (m) asm\n", "-G {e,m}"); + fprintf(hf, "\t%-11s dump debug information\n", "-d "); exit(c != 'h'); } switch (asm) { case Gaself: - locprefix = ".L"; - symprefix = ""; + gasloc = ".L"; + gassym = ""; break; case Gasmacho: - locprefix = "L"; - symprefix = "_"; + gasloc = "L"; + gassym = "_"; break; } @@ -159,7 +191,7 @@ main(int ac, char *av[]) } while (++optind < ac); if (!dbg) - emitfin(outf); + gasemitfin(outf); exit(0); } diff --git a/mem.c b/mem.c index fd6ee16..eda3d18 100644 --- a/mem.c +++ b/mem.c @@ -34,9 +34,9 @@ memopt(Fn *fn) if (isstore(l->op)) if (req(i->to, l->arg[1]) && !req(i->to, l->arg[0])) if (s == -1 || s == storesz(l)) - if (k == -1 || k == opdesc[l->op].argcls[0][0]) { + if (k == -1 || k == optab[l->op].argcls[0][0]) { s = storesz(l); - k = opdesc[l->op].argcls[0][0]; + k = optab[l->op].argcls[0][0]; continue; } goto Skip; diff --git a/ops.h b/ops.h new file mode 100644 index 0000000..9b357a5 --- /dev/null +++ b/ops.h @@ -0,0 +1,167 @@ +#ifndef X /* amd64 */ + #define X(NMemArgs, SetsZeroFlag, LeavesFlags) +#endif + +#define T(a,b,c,d,e,f,g,h) { \ + {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \ + {[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h} \ +} + + +/*********************/ +/* PUBLIC OPERATIONS */ +/*********************/ + +/* Arithmetic and Bits */ +O(add, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0) +O(sub, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0) +O(div, T(w,l,s,d, w,l,s,d), 1) X(0, 0, 0) +O(rem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0) +O(udiv, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0) +O(urem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0) +O(mul, T(w,l,s,d, w,l,s,d), 1) X(2, 0, 0) +O(and, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0) +O(or, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0) +O(xor, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0) +O(sar, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0) +O(shr, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0) +O(shl, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0) + +/* Comparisons */ +O(ceqw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(cnew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(csgew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(csgtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(cslew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(csltw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(cugew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(cugtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(culew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) +O(cultw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) + +O(ceql, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(cnel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(csgel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(csgtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(cslel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(csltl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(cugel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(cugtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(culel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) +O(cultl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) + +O(ceqs, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) +O(cges, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) +O(cgts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) +O(cles, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) +O(clts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) +O(cnes, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) +O(cos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) +O(cuos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) + +O(ceqd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) +O(cged, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) +O(cgtd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) +O(cled, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) +O(cltd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) +O(cned, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) +O(cod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) +O(cuod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) + +/* Memory */ +O(storeb, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1) +O(storeh, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1) +O(storew, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1) +O(storel, T(l,e,e,e, m,e,e,e), 0) X(0, 0, 1) +O(stores, T(s,e,e,e, m,e,e,e), 0) X(0, 0, 1) +O(stored, T(d,e,e,e, m,e,e,e), 0) X(0, 0, 1) + +O(loadsb, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) +O(loadub, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) +O(loadsh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) +O(loaduh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) +O(loadsw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) +O(loaduw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) +O(load, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 1) + +/* Extensions and Truncations */ +O(extsb, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) +O(extub, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) +O(extsh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) +O(extuh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) +O(extsw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1) +O(extuw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1) + +O(exts, T(e,e,e,s, e,e,e,x), 1) X(0, 0, 1) +O(truncd, T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1) +O(stosi, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1) +O(dtosi, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1) +O(swtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1) +O(sltof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1) +O(cast, T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1) + +/* Stack Allocation */ +O(alloc4, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) +O(alloc8, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) +O(alloc16, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) + +/* Variadic Function Helpers */ +O(vaarg, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0) +O(vastart, T(m,e,e,e, x,e,e,e), 0) X(0, 0, 0) + +O(copy, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1) + + +/****************************************/ +/* INTERNAL OPERATIONS (keep nop first) */ +/****************************************/ + +/* Miscellaneous and Architecture-Specific Operations */ +O(nop, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1) +O(addr, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) +O(swap, T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0) +O(sign, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) +O(salloc, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) +O(xidiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0) +O(xdiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0) +O(xcmp, T(w,l,s,d, w,l,s,d), 0) X(1, 1, 0) +O(xtest, T(w,l,e,e, w,l,e,e), 0) X(1, 1, 0) + +/* Arguments, Parameters, and Calls */ +O(par, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) +O(parc, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) +O(pare, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) +O(arg, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0) +O(argc, T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0) +O(arge, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0) +O(call, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0) +O(vacall, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0) + +/* Flags Setting */ +O(flagieq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagine, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagisge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagisgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagisle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagislt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagiuge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagiugt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagiule, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagiult, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagfeq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagfge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagfgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagfle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagflt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagfne, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagfo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) +O(flagfuo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) + + +#undef T +#undef X +#undef O + +/* +| column -t -o ' ' +*/ diff --git a/parse.c b/parse.c index b393fc2..69bd74e 100644 --- a/parse.c +++ b/parse.c @@ -4,91 +4,13 @@ enum { Ke = -2, /* Erroneous mode */ - Km = Kl, /* Memory pointer (for x64) */ + Km = Kl, /* Memory pointer */ }; -OpDesc opdesc[NOp] = { -#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d} - - /* NAME NM ARGCLS0 ARGCLS1 SF LF FLD*/ - [Oadd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 }, - [Osub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 }, - [Odiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 }, - [Orem] = { "rem", 2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 }, - [Oudiv] = { "udiv", 2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 }, - [Ourem] = { "urem", 2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 }, - [Omul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 }, - [Oand] = { "and", 2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 }, - [Oor] = { "or", 2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 }, - [Oxor] = { "xor", 2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 }, - [Osar] = { "sar", 1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 }, - [Oshr] = { "shr", 1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 }, - [Oshl] = { "shl", 1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 }, - [Ostored] = { "stored", 0, {A(d,e,e,e), A(m,e,e,e)}, 0, 1, 0 }, - [Ostores] = { "stores", 0, {A(s,e,e,e), A(m,e,e,e)}, 0, 1, 0 }, - [Ostorel] = { "storel", 0, {A(l,e,e,e), A(m,e,e,e)}, 0, 1, 0 }, - [Ostorew] = { "storew", 0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 }, - [Ostoreh] = { "storeh", 0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 }, - [Ostoreb] = { "storeb", 0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 }, - [Oload] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1, 0 }, - [Oloadsw] = { "loadsw", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 }, - [Oloaduw] = { "loaduw", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 }, - [Oloadsh] = { "loadsh", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 }, - [Oloaduh] = { "loaduh", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 }, - [Oloadsb] = { "loadsb", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 }, - [Oloadub] = { "loadub", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 }, - [Oextsw] = { "extsw", 0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 }, - [Oextuw] = { "extuw", 0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 }, - [Oextsh] = { "extsh", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 }, - [Oextuh] = { "extuh", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 }, - [Oextsb] = { "extsb", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 }, - [Oextub] = { "extub", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 }, - [Oexts] = { "exts", 0, {A(e,e,e,s), A(e,e,e,x)}, 0, 1, 1 }, - [Otruncd] = { "truncd", 0, {A(e,e,d,e), A(e,e,x,e)}, 0, 1, 1 }, - [Ostosi] = { "stosi", 0, {A(s,s,e,e), A(x,x,e,e)}, 0, 1, 1 }, - [Odtosi] = { "dtosi", 0, {A(d,d,e,e), A(x,x,e,e)}, 0, 1, 1 }, - [Oswtof] = { "swtof", 0, {A(e,e,w,w), A(e,e,x,x)}, 0, 1, 1 }, - [Osltof] = { "sltof", 0, {A(e,e,l,l), A(e,e,x,x)}, 0, 1, 1 }, - [Ocast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1, 1 }, - [Ocopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1, 0 }, - [Onop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1, 0 }, - [Oswap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 0 }, - [Osign] = { "sign", 0, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 }, - [Osalloc] = { "salloc", 0, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 }, - [Oxidiv] = { "xidiv", 1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 }, - [Oxdiv] = { "xdiv", 1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 }, - [Oxcmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 0 }, - [Oxtest] = { "xtest", 1, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 0 }, - [Oaddr] = { "addr", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 }, - [Opar] = { "par", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 }, - [Opare] = { "pare", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 }, - [Oparc] = { "parc", 0, {A(e,x,e,e), A(e,x,e,e)}, 0, 0, 0 }, - [Oarg] = { "arg", 0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 }, - [Oarge] = { "arge", 0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 }, - [Oargc] = { "argc", 0, {A(e,x,e,e), A(e,l,e,e)}, 0, 0, 0 }, - [Ocall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 }, - [Ovacall] = { "vacall", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 }, - [Oxsetnp] = { "xsetnp", 0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 }, - [Oxsetp] = { "xsetp", 0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 }, - [Oalloc] = { "alloc4", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 }, - [Oalloc+1] = { "alloc8", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 }, - [Oalloc+2] = { "alloc16", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 }, - [Ovaarg] = { "vaarg", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 }, - [Ovastart] = { "vastart", 0, {A(m,e,e,e), A(x,e,e,e)}, 0, 0, 0 }, -#define X(c) \ - [Ocmpw+IC##c] = { "c" #c "w", 0, {A(w,w,e,e), A(w,w,e,e)}, 1, 0, 1 }, \ - [Ocmpl+IC##c] = { "c" #c "l", 0, {A(l,l,e,e), A(l,l,e,e)}, 1, 0, 1 }, \ - [Oxset+IC##c] = { "xset" #c, 0, {A(x,x,e,e), A(x,x,e,e)}, 0, 1, 0 }, - ICMPS(X) -#undef X -#define X(c) \ - [Ocmps+FC##c] = { "c" #c "s", 0, {A(s,s,e,e), A(s,s,e,e)}, 1, 0, 1 }, \ - [Ocmpd+FC##c] = { "c" #c "d", 0, {A(d,d,e,e), A(d,d,e,e)}, 1, 0, 1 }, - FCMPS(X) -#undef X - +Op optab[NOp] = { +#define O(op, t, cf) [O##op]={#op, t, cf}, + #include "ops.h" }; -#undef A typedef enum { PXXX, @@ -242,8 +164,8 @@ lexinit() if (done) return; for (i=0; iins; i-b->ins < b->nins; i++) for (n=0; n<2; n++) { - k = opdesc[i->op].argcls[n][i->cls]; + k = optab[i->op].argcls[n][i->cls]; r = i->arg[n]; t = &fn->tmp[r.val]; if (k == Ke) err("invalid instruction type in %s", - opdesc[i->op].name); + optab[i->op].name); if (rtype(r) == RType) continue; if (rtype(r) != -1 && k == Kx) err("no %s operand expected in %s", n == 1 ? "second" : "first", - opdesc[i->op].name); + optab[i->op].name); if (rtype(r) == -1 && k != Kx) err("missing %s operand in %s", n == 1 ? "second" : "first", - opdesc[i->op].name); + optab[i->op].name); if (!usecheck(r, k, fn)) err("invalid type for %s operand %%%s in %s", n == 1 ? "second" : "first", - t->name, opdesc[i->op].name); + t->name, optab[i->op].name); } r = b->jmp.arg; if (isret(b->jmp.type)) { @@ -866,7 +788,10 @@ parsefn(int export) curf->tmp = vnew(curf->ntmp, sizeof curf->tmp[0], Pfn); curf->con = vnew(curf->ncon, sizeof curf->con[0], Pfn); for (i=0; icon[0].type = CBits; curf->export = export; blink = &curf->start; @@ -1228,29 +1153,12 @@ printref(Ref r, Fn *fn, FILE *f) void printfn(Fn *fn, FILE *f) { + static char ktoc[] = "wlsd"; static char *jtoa[NJmp] = { - [Jret0] = "ret", - [Jretw] = "retw", - [Jretl] = "retl", - [Jretc] = "retc", - [Jrets] = "rets", - [Jretd] = "retd", - [Jjnz] = "jnz", - [Jxjnp] = "xjnp", - [Jxjp] = "xjp", - #define X(c) [Jxjc+IC##c] = "xj" #c, - ICMPS(X) + #define X(j) [J##j] = #j, + JMPS(X) #undef X }; - static char prcls[NOp] = { - [Oarg] = 1, - [Oswap] = 1, - [Oxcmp] = 1, - [Oxtest] = 1, - [Oxdiv] = 1, - [Oxidiv] = 1, - }; - static char ktoc[] = "wlsd"; Blk *b; Phi *p; Ins *i; @@ -1282,10 +1190,18 @@ printfn(Fn *fn, FILE *f) printref(i->to, fn, f); fprintf(f, " =%c ", ktoc[i->cls]); } - assert(opdesc[i->op].name); - fprintf(f, "%s", opdesc[i->op].name); - if (req(i->to, R) && prcls[i->op]) - fputc(ktoc[i->cls], f); + assert(optab[i->op].name); + fprintf(f, "%s", optab[i->op].name); + if (req(i->to, R)) + switch (i->op) { + case Oarg: + case Oswap: + case Oxcmp: + case Oxtest: + case Oxdiv: + case Oxidiv: + fputc(ktoc[i->cls], f); + } if (!req(i->arg[0], R)) { fprintf(f, " "); printref(i->arg[0], fn, f); diff --git a/rega.c b/rega.c index 3d83327..02429a6 100644 --- a/rega.c +++ b/rega.c @@ -8,8 +8,8 @@ typedef struct RMap RMap; struct RMap { - int t[NIReg+NFReg]; - int r[NIReg+NFReg]; + int t[Tmp0]; + int r[Tmp0]; BSet b[1]; int n; }; @@ -78,10 +78,12 @@ static void radd(RMap *m, int t, int r) { assert((t >= Tmp0 || t == r) && "invalid temporary"); - assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register"); + assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr) + || (T.fpr0 <= r && r < T.fpr0 + T.nfpr)) + && "invalid register"); assert(!bshas(m->b, t) && "temporary has mapping"); assert(!bshas(m->b, r) && "register already allocated"); - assert(m->n <= NIReg+NFReg && "too many mappings"); + assert(m->n <= T.ngpr+T.nfpr && "too many mappings"); bsset(m->b, t); bsset(m->b, r); m->t[m->n] = t; @@ -110,11 +112,11 @@ ralloc(RMap *m, int t) regs = tmp[phicls(t, tmp)].hint.m; regs |= m->b->t[0]; if (KBASE(tmp[t].cls) == 0) { - r0 = RAX; - r1 = RAX + NIReg; + r0 = T.gpr0; + r1 = r0 + T.ngpr; } else { - r0 = XMM0; - r1 = XMM0 + NFReg; + r0 = T.fpr0; + r1 = r0 + T.nfpr; } for (r=r0; r= Tmp0 || !(BIT(t) & RGLOB)); + assert(t >= Tmp0 || !(BIT(t) & T.rglob)); if (!bshas(m->b, t)) return -1; for (i=0; m->t[i] != t; i++) @@ -295,10 +297,10 @@ dopm(Blk *b, Ins *i, RMap *m) } while (i != b->ins && regcpy(i-1)); assert(m0.n <= m->n); if (i != b->ins && (i-1)->op == Ocall) { - def = retregs((i-1)->arg[1], 0); - for (r=0; rarg[1], 0) | T.rglob; + for (r=0; T.rsave[r]>=0; r++) + if (!(BIT(T.rsave[r]) & def)) + move(T.rsave[r], R, m); } for (npm=0, n=0; nn; n++) { t = m->t[n]; @@ -370,10 +372,10 @@ doblk(Blk *b, RMap *cur) for (i=&b->ins[b->nins]; i!=b->ins;) { switch ((--i)->op) { case Ocall: - rs = argregs(i->arg[1], 0); - for (r=0; rarg[1], 0) | T.rglob; + for (r=0; T.rsave[r]>=0; r++) + if (!(BIT(T.rsave[r]) & rs)) + rfree(cur, T.rsave[r]); break; case Ocopy: if (isreg(i->arg[0])) { @@ -388,7 +390,7 @@ doblk(Blk *b, RMap *cur) if (!req(i->to, R)) { assert(rtype(i->to) == RTmp); r = i->to.val; - if (r >= Tmp0 || !(BIT(r) & RGLOB)) + if (r >= Tmp0 || !(BIT(r) & T.rglob)) r = rfree(cur, r); if (r == -1) { assert(!isreg(i->to)); diff --git a/spill.c b/spill.c index 0872fd5..3871247 100644 --- a/spill.c +++ b/spill.c @@ -196,8 +196,8 @@ limit2(BSet *b1, int k1, int k2, BSet *fst) bscopy(b2, b1); bsinter(b1, mask[0]); bsinter(b2, mask[1]); - limit(b1, NIReg - k1, fst); - limit(b2, NFReg - k2, fst); + limit(b1, T.ngpr - k1, fst); + limit(b2, T.nfpr - k2, fst); bsunion(b1, b2); } @@ -265,11 +265,11 @@ dopm(Blk *b, Ins *i, BSet *v) } while (i != b->ins && regcpy(i-1)); bscopy(u, v); if (i != b->ins && (i-1)->op == Ocall) { - v->t[0] &= ~retregs((i-1)->arg[1], 0); - limit2(v, NISave, NFSave, 0); - for (r=0, n=0; nt[0] |= argregs((i-1)->arg[1], 0); + v->t[0] &= ~T.retregs((i-1)->arg[1], 0); + limit2(v, T.nrsave[0], T.nrsave[1], 0); + for (n=0, r=0; T.rsave[n]>=0; n++) + r |= BIT(T.rsave[n]); + v->t[0] |= T.argregs((i-1)->arg[1], 0); } else { limit2(v, 0, 0, 0); r = v->t[0]; @@ -318,9 +318,9 @@ spill(Fn *fn) slot8 = 0; for (t=0; t= XMM0 && t < XMM0 + NFReg) + if (t >= T.fpr0 && t < T.fpr0 + T.nfpr) k = 1; - else if (t >= Tmp0) + if (t >= Tmp0) k = KBASE(tmp[t].cls); bsset(mask[k], t); } @@ -344,9 +344,9 @@ spill(Fn *fn) if (hd) { /* back-edge */ bszero(v); - hd->gen->t[0] |= RGLOB; /* don't spill registers */ + hd->gen->t[0] |= T.rglob; /* don't spill registers */ for (k=0; k<2; k++) { - n = k == 0 ? NIReg : NFReg; + n = k == 0 ? T.ngpr : T.nfpr; bscopy(u, b->out); bsinter(u, mask[k]); bscopy(w, u); @@ -373,7 +373,7 @@ spill(Fn *fn) } else { bscopy(v, b->out); if (rtype(b->jmp.arg) == RCall) - v->t[0] |= retregs(b->jmp.arg, 0); + v->t[0] |= T.retregs(b->jmp.arg, 0); } for (t=Tmp0; bsiter(b->out, &t); t++) if (!bshas(v, t)) @@ -381,7 +381,7 @@ spill(Fn *fn) bscopy(b->out, v); /* 2. process the block instructions */ - r = v->t[0] & (BIT(Tmp0)-1); + r = v->t[0]; curi = &insb[NIns]; for (i=&b->ins[b->nins]; i!=b->ins;) { i--; @@ -402,7 +402,7 @@ spill(Fn *fn) bsset(w, t); } } - j = opdesc[i->op].nmem; + j = T.memargs(i->op); for (n=0; n<2; n++) if (rtype(i->arg[n]) == RMem) j--; @@ -449,11 +449,11 @@ spill(Fn *fn) bsclr(v, t); } emiti(*i); - r = v->t[0] & (BIT(Tmp0)-1); + r = v->t[0]; /* Tmp0 is NBit */ if (r) sethint(v, r); } - assert(r == RGLOB || b == fn->start); + assert(r == T.rglob || b == fn->start); for (p=b->phi; p; p=p->link) { assert(rtype(p->to) == RTmp); diff --git a/sysv.c b/sysv.c deleted file mode 100644 index a88b044..0000000 --- a/sysv.c +++ /dev/null @@ -1,718 +0,0 @@ -#include "all.h" - -typedef struct AClass AClass; -typedef struct RAlloc RAlloc; - -struct AClass { - int inmem; - int align; - uint size; - int cls[2]; - Ref ref[2]; -}; - -struct RAlloc { - Ins i; - RAlloc *link; -}; - -static void -classify(AClass *a, Typ *t, int *pn, int *pe) -{ - Seg *seg; - int n, s, *cls; - - for (n=0; nnunion; n++) { - seg = t->seg[n]; - for (s=0; *pe<2; (*pe)++) { - cls = &a->cls[*pe]; - for (; *pn<8; s++) { - switch (seg[s].type) { - case SEnd: - goto Done; - case SPad: - /* don't change anything */ - break; - case SFlt: - if (*cls == Kx) - *cls = Kd; - break; - case SInt: - *cls = Kl; - break; - case STyp: - classify(a, &typ[seg[s].len], pn, pe); - continue; - } - *pn += seg[s].len; - } - Done: - assert(*pn <= 8); - *pn = 0; - } - } -} - -static void -typclass(AClass *a, Typ *t) -{ - int e, n; - uint sz, al; - - sz = t->size; - al = 1u << t->align; - - /* the ABI requires sizes to be rounded - * up to the nearest multiple of 8, moreover - * it makes it easy load and store structures - * in registers - */ - if (al < 8) - al = 8; - sz = (sz + al-1) & -al; - - a->size = sz; - a->align = t->align; - - if (t->dark || sz > 16 || sz == 0) { - /* large or unaligned structures are - * required to be passed in memory - */ - a->inmem = 1; - return; - } - - a->cls[0] = Kx; - a->cls[1] = Kx; - a->inmem = 0; - n = 0; - e = 0; - classify(a, t, &n, &e); -} - -static void -blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) -{ - Ref r, r1; - uint boff; - - /* it's an impolite blit, we might go across the end - * of the source object a little bit... */ - for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { - r = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(Ostorel, 0, R, r, r1); - emit(Oadd, Kl, r1, rstk, getcon(soff, fn)); - r1 = newtmp("abi", Kl, fn); - emit(Oload, Kl, r, r1, R); - emit(Oadd, Kl, r1, rsrc, getcon(boff, fn)); - } -} - -static int -retr(Ref reg[2], AClass *aret) -{ - static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; - int n, k, ca, nr[2]; - - nr[0] = nr[1] = 0; - ca = 0; - for (n=0; (uint)n*8size; n++) { - k = KBASE(aret->cls[n]); - reg[n] = TMP(retreg[k][nr[k]++]); - ca += 1 << (2 * k); - } - return ca; -} - -static void -selret(Blk *b, Fn *fn) -{ - int j, k, ca; - Ref r, r0, reg[2]; - AClass aret; - - j = b->jmp.type; - - if (!isret(j) || j == Jret0) - return; - - r0 = b->jmp.arg; - b->jmp.type = Jret0; - - if (j == Jretc) { - typclass(&aret, &typ[fn->retty]); - if (aret.inmem) { - assert(rtype(fn->retr) == RTmp); - emit(Ocopy, Kl, TMP(RAX), fn->retr, R); - blit(fn->retr, 0, r0, aret.size, fn); - ca = 1; - } else { - ca = retr(reg, &aret); - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - emit(Oload, Kl, reg[1], r, R); - emit(Oadd, Kl, r, r0, getcon(8, fn)); - } - emit(Oload, Kl, reg[0], r0, R); - } - } else { - k = j - Jretw; - if (KBASE(k) == 0) { - emit(Ocopy, k, TMP(RAX), r0, R); - ca = 1; - } else { - emit(Ocopy, k, TMP(XMM0), r0, R); - ca = 1 << 2; - } - } - - b->jmp.arg = CALL(ca); -} - -static int -argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env) -{ - int nint, ni, nsse, ns, n, *pn; - AClass *a; - Ins *i; - - if (aret && aret->inmem) - nint = 5; /* hidden argument */ - else - nint = 6; - nsse = 8; - for (i=i0, a=ac; iop - op + Oarg) { - case Oarg: - if (KBASE(i->cls) == 0) - pn = &nint; - else - pn = &nsse; - if (*pn > 0) { - --*pn; - a->inmem = 0; - } else - a->inmem = 2; - a->align = 3; - a->size = 8; - a->cls[0] = i->cls; - break; - case Oargc: - n = i->arg[0].val; - typclass(a, &typ[n]); - if (a->inmem) - continue; - ni = ns = 0; - for (n=0; (uint)n*8size; n++) - if (KBASE(a->cls[n]) == 0) - ni++; - else - ns++; - if (nint >= ni && nsse >= ns) { - nint -= ni; - nsse -= ns; - } else - a->inmem = 1; - break; - case Oarge: - if (op == Opar) - *env = i->to; - else - *env = i->arg[0]; - break; - } - - return ((6-nint) << 4) | ((8-nsse) << 8); -} - -int rsave[] = { - RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 -}; -int rclob[] = {RBX, R12, R13, R14, R15}; - -MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); -MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); - -/* layout of call's second argument (RCall) - * - * 29 12 8 4 3 0 - * |0...00|x|xxxx|xxxx|xx|xx| range - * | | | | ` gp regs returned (0..2) - * | | | ` sse regs returned (0..2) - * | | ` gp regs passed (0..6) - * | ` sse regs passed (0..8) - * ` 1 if rax is used to pass data (0..1) - */ - -bits -retregs(Ref r, int p[2]) -{ - bits b; - int ni, nf; - - assert(rtype(r) == RCall); - b = 0; - ni = r.val & 3; - nf = (r.val >> 2) & 3; - if (ni >= 1) - b |= BIT(RAX); - if (ni >= 2) - b |= BIT(RDX); - if (nf >= 1) - b |= BIT(XMM0); - if (nf >= 2) - b |= BIT(XMM1); - if (p) { - p[0] = ni; - p[1] = nf; - } - return b; -} - -bits -argregs(Ref r, int p[2]) -{ - bits b; - int j, ni, nf, ra; - - assert(rtype(r) == RCall); - b = 0; - ni = (r.val >> 4) & 15; - nf = (r.val >> 8) & 15; - ra = (r.val >> 12) & 1; - for (j=0; jarg[1], R)) { - assert(rtype(i1->arg[1]) == RType); - typclass(&aret, &typ[i1->arg[1].val]); - ca = argsclass(i0, i1, ac, Oarg, &aret, &env); - } else - ca = argsclass(i0, i1, ac, Oarg, 0, &env); - - for (stk=0, a=&ac[i1-i0]; a>ac;) - if ((--a)->inmem) { - if (a->align > 4) - err("sysv abi requires alignments of 16 or less"); - stk += a->size; - if (a->align == 4) - stk += stk & 15; - } - stk += stk & 15; - if (stk) { - r = getcon(-(int64_t)stk, fn); - emit(Osalloc, Kl, R, r, R); - } - - if (!req(i1->arg[1], R)) { - if (aret.inmem) { - /* get the return location from eax - * it saves one callee-save reg */ - r1 = newtmp("abi", Kl, fn); - emit(Ocopy, Kl, i1->to, TMP(RAX), R); - ca += 1; - } else { - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - aret.ref[1] = newtmp("abi", aret.cls[1], fn); - emit(Ostorel, 0, R, aret.ref[1], r); - emit(Oadd, Kl, r, i1->to, getcon(8, fn)); - } - aret.ref[0] = newtmp("abi", aret.cls[0], fn); - emit(Ostorel, 0, R, aret.ref[0], i1->to); - ca += retr(reg, &aret); - if (aret.size > 8) - emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R); - emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R); - r1 = i1->to; - } - /* allocate return pad */ - ra = alloc(sizeof *ra); - /* specific to NAlign == 3 */ - al = aret.align >= 2 ? aret.align - 2 : 0; - ra->i = (Ins){Oalloc+al, r1, {getcon(aret.size, fn)}, Kl}; - ra->link = (*rap); - *rap = ra; - } else { - ra = 0; - if (KBASE(i1->cls) == 0) { - emit(Ocopy, i1->cls, i1->to, TMP(RAX), R); - ca += 1; - } else { - emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R); - ca += 1 << 2; - } - } - envc = !req(R, env); - varc = i1->op == Ovacall; - if (varc && envc) - err("sysv abi does not support variadic env calls"); - ca |= (varc | envc) << 12; - emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca)); - if (envc) - emit(Ocopy, Kl, TMP(RAX), env, R); - if (varc) - emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); - - ni = ns = 0; - if (ra && aret.inmem) - emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ - for (i=i0, a=ac; iinmem) - continue; - r1 = rarg(a->cls[0], &ni, &ns); - if (i->op == Oargc) { - if (a->size > 8) { - r2 = rarg(a->cls[1], &ni, &ns); - r = newtmp("abi", Kl, fn); - emit(Oload, a->cls[1], r2, r, R); - emit(Oadd, Kl, r, i->arg[1], getcon(8, fn)); - } - emit(Oload, a->cls[0], r1, i->arg[1], R); - } else - emit(Ocopy, i->cls, r1, i->arg[0], R); - } - - if (!stk) - return; - - r = newtmp("abi", Kl, fn); - for (i=i0, a=ac, off=0; iinmem) - continue; - if (i->op == Oargc) { - if (a->align == 4) - off += off & 15; - blit(r, off, i->arg[1], a->size, fn); - } else { - r1 = newtmp("abi", Kl, fn); - emit(Ostorel, 0, R, i->arg[0], r1); - emit(Oadd, Kl, r1, r, getcon(off, fn)); - } - off += a->size; - } - emit(Osalloc, Kl, r, getcon(stk, fn), R); -} - -static int -selpar(Fn *fn, Ins *i0, Ins *i1) -{ - AClass *ac, *a, aret; - Ins *i; - int ni, ns, s, al, fa; - Ref r, env; - - env = R; - ac = alloc((i1-i0) * sizeof ac[0]); - curi = &insb[NIns]; - ni = ns = 0; - - if (fn->retty >= 0) { - typclass(&aret, &typ[fn->retty]); - fa = argsclass(i0, i1, ac, Opar, &aret, &env); - } else - fa = argsclass(i0, i1, ac, Opar, 0, &env); - - for (i=i0, a=ac; iop != Oparc || a->inmem) - continue; - if (a->size > 8) { - r = newtmp("abi", Kl, fn); - a->ref[1] = newtmp("abi", Kl, fn); - emit(Ostorel, 0, R, a->ref[1], r); - emit(Oadd, Kl, r, i->to, getcon(8, fn)); - } - a->ref[0] = newtmp("abi", Kl, fn); - emit(Ostorel, 0, R, a->ref[0], i->to); - /* specific to NAlign == 3 */ - al = a->align >= 2 ? a->align - 2 : 0; - emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R); - } - - if (fn->retty >= 0 && aret.inmem) { - r = newtmp("abi", Kl, fn); - emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R); - fn->retr = r; - } - - for (i=i0, a=ac, s=4; iinmem) { - case 1: - if (a->align > 4) - err("sysv abi requires alignments of 16 or less"); - if (a->align == 4) - s = (s+3) & -4; - fn->tmp[i->to.val].slot = -s; - s += a->size / 4; - continue; - case 2: - emit(Oload, i->cls, i->to, SLOT(-s), R); - s += 2; - continue; - } - r = rarg(a->cls[0], &ni, &ns); - if (i->op == Oparc) { - emit(Ocopy, Kl, a->ref[0], r, R); - if (a->size > 8) { - r = rarg(a->cls[1], &ni, &ns); - emit(Ocopy, Kl, a->ref[1], r, R); - } - } else - emit(Ocopy, i->cls, i->to, r, R); - } - - if (!req(R, env)) - emit(Ocopy, Kl, env, TMP(RAX), R); - - return fa | (s*4)<<12; -} - -static Blk * -split(Fn *fn, Blk *b) -{ - Blk *bn; - - ++fn->nblk; - bn = blknew(); - bn->nins = &insb[NIns] - curi; - idup(&bn->ins, curi, bn->nins); - curi = &insb[NIns]; - bn->visit = ++b->visit; - snprintf(bn->name, NString, "%s.%d", b->name, b->visit); - bn->loop = b->loop; - bn->link = b->link; - b->link = bn; - return bn; -} - -static void -chpred(Blk *b, Blk *bp, Blk *bp1) -{ - Phi *p; - uint a; - - for (p=b->phi; p; p=p->link) { - for (a=0; p->blk[a]!=bp; a++) - assert(a+1narg); - p->blk[a] = bp1; - } -} - -void -selvaarg(Fn *fn, Blk *b, Ins *i) -{ - Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap; - Blk *b0, *bstk, *breg; - int isint; - - c4 = getcon(4, fn); - c8 = getcon(8, fn); - c16 = getcon(16, fn); - ap = i->arg[0]; - isint = KBASE(i->cls) == 0; - - /* @b [...] - r0 =l add ap, (0 or 4) - nr =l loadsw r0 - r1 =w cultw nr, (48 or 176) - jnz r1, @breg, @bstk - @breg - r0 =l add ap, 16 - r1 =l loadl r0 - lreg =l add r1, nr - r0 =w add nr, (8 or 16) - r1 =l add ap, (0 or 4) - storew r0, r1 - @bstk - r0 =l add ap, 8 - lstk =l loadl r0 - r1 =l add lstk, 8 - storel r1, r0 - @b0 - %loc =l phi @breg %lreg, @bstk %lstk - i->to =(i->cls) load %loc - */ - - loc = newtmp("abi", Kl, fn); - emit(Oload, i->cls, i->to, loc, R); - b0 = split(fn, b); - b0->jmp = b->jmp; - b0->s1 = b->s1; - b0->s2 = b->s2; - if (b->s1) - chpred(b->s1, b, b0); - if (b->s2 && b->s2 != b->s1) - chpred(b->s2, b, b0); - - lreg = newtmp("abi", Kl, fn); - nr = newtmp("abi", Kl, fn); - r0 = newtmp("abi", Kw, fn); - r1 = newtmp("abi", Kl, fn); - emit(Ostorew, Kw, R, r0, r1); - emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4); - emit(Oadd, Kw, r0, nr, isint ? c8 : c16); - r0 = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(Oadd, Kl, lreg, r1, nr); - emit(Oload, Kl, r1, r0, R); - emit(Oadd, Kl, r0, ap, c16); - breg = split(fn, b); - breg->jmp.type = Jjmp; - breg->s1 = b0; - - lstk = newtmp("abi", Kl, fn); - r0 = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(Ostorel, Kw, R, r1, r0); - emit(Oadd, Kl, r1, lstk, c8); - emit(Oload, Kl, lstk, r0, R); - emit(Oadd, Kl, r0, ap, c8); - bstk = split(fn, b); - bstk->jmp.type = Jjmp; - bstk->s1 = b0; - - b0->phi = alloc(sizeof *b0->phi); - *b0->phi = (Phi){ - .cls = Kl, .to = loc, - .narg = 2, - .blk = {bstk, breg}, - .arg = {lstk, lreg}, - }; - r0 = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kw, fn); - b->jmp.type = Jjnz; - b->jmp.arg = r1; - b->s1 = breg; - b->s2 = bstk; - c = getcon(isint ? 48 : 176, fn); - emit(Ocmpw+ICult, Kw, r1, nr, c); - emit(Oloadsw, Kl, nr, r0, R); - emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4); -} - -void -selvastart(Fn *fn, int fa, Ref ap) -{ - Ref r0, r1; - int gp, fp, sp; - - gp = ((fa >> 4) & 15) * 8; - fp = 48 + ((fa >> 8) & 15) * 16; - sp = fa >> 12; - r0 = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(Ostorel, Kw, R, r1, r0); - emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn)); - emit(Oadd, Kl, r0, ap, getcon(16, fn)); - r0 = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(Ostorel, Kw, R, r1, r0); - emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn)); - emit(Oadd, Kl, r0, ap, getcon(8, fn)); - r0 = newtmp("abi", Kl, fn); - emit(Ostorew, Kw, R, getcon(fp, fn), r0); - emit(Oadd, Kl, r0, ap, getcon(4, fn)); - emit(Ostorew, Kw, R, getcon(gp, fn), ap); -} - -void -abi(Fn *fn) -{ - Blk *b; - Ins *i, *i0, *ip; - RAlloc *ral; - int n, fa; - - for (b=fn->start; b; b=b->link) - b->visit = 0; - - /* lower parameters */ - for (b=fn->start, i=b->ins; i-b->insnins; i++) - if (!ispar(i->op)) - break; - fa = selpar(fn, b->ins, i); - n = b->nins - (i - b->ins) + (&insb[NIns] - curi); - i0 = alloc(n * sizeof(Ins)); - ip = icpy(ip = i0, curi, &insb[NIns] - curi); - ip = icpy(ip, i, &b->ins[b->nins] - i); - b->nins = n; - b->ins = i0; - - /* lower calls, returns, and vararg instructions */ - ral = 0; - b = fn->start; - do { - if (!(b = b->link)) - b = fn->start; /* do it last */ - if (b->visit) - continue; - curi = &insb[NIns]; - selret(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) - switch ((--i)->op) { - default: - emiti(*i); - break; - case Ocall: - case Ovacall: - for (i0=i; i0>b->ins; i0--) - if (!isarg((i0-1)->op)) - break; - selcall(fn, i0, i, &ral); - i = i0; - break; - case Ovastart: - selvastart(fn, fa, i->arg[0]); - break; - case Ovaarg: - selvaarg(fn, b, i); - break; - case Oarg: - case Oargc: - die("unreachable"); - } - if (b == fn->start) - for (; ral; ral=ral->link) - emiti(ral->i); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } while (b != fn->start); - - if (debug['A']) { - fprintf(stderr, "\n> After ABI lowering:\n"); - printfn(fn, stderr); - } -} diff --git a/util.c b/util.c index 9b73771..aae1481 100644 --- a/util.c +++ b/util.c @@ -87,6 +87,36 @@ freeall() nptr = 1; } +int +iscmp(int op, int *pk, int *pc) +{ + if (Ocmpw <= op && op <= Ocmpw1) { + *pc = op - Ocmpw; + *pk = Kw; + } + else if (Ocmpl <= op && op <= Ocmpl1) { + *pc = op - Ocmpl; + *pk = Kl; + } + else if (Ocmps <= op && op <= Ocmps1) { + *pc = NCmpI + op - Ocmps; + *pk = Ks; + } + else if (Ocmpd <= op && op <= Ocmpd1) { + *pc = NCmpI + op - Ocmpd; + *pk = Kd; + } + else + return 0; + return 1; +} + +int +argcls(Ins *i, int n) +{ + return optab[i->op].argcls[n][i->cls]; +} + void emit(int op, int k, Ref to, Ref arg0, Ref arg1) { @@ -165,6 +195,42 @@ vgrow(void *vp, ulong len) *(Vec **)vp = v1; } +static int cmptab[][2] ={ + /* negation swap */ + [Ciule] = {Ciugt, Ciuge}, + [Ciult] = {Ciuge, Ciugt}, + [Ciugt] = {Ciule, Ciult}, + [Ciuge] = {Ciult, Ciule}, + [Cisle] = {Cisgt, Cisge}, + [Cislt] = {Cisge, Cisgt}, + [Cisgt] = {Cisle, Cislt}, + [Cisge] = {Cislt, Cisle}, + [Cieq] = {Cine, Cieq}, + [Cine] = {Cieq, Cine}, + [NCmpI+Cfle] = {NCmpI+Cfgt, NCmpI+Cfge}, + [NCmpI+Cflt] = {NCmpI+Cfge, NCmpI+Cfgt}, + [NCmpI+Cfgt] = {NCmpI+Cfle, NCmpI+Cflt}, + [NCmpI+Cfge] = {NCmpI+Cflt, NCmpI+Cfle}, + [NCmpI+Cfeq] = {NCmpI+Cfne, NCmpI+Cfeq}, + [NCmpI+Cfne] = {NCmpI+Cfeq, NCmpI+Cfne}, + [NCmpI+Cfo] = {NCmpI+Cfuo, NCmpI+Cfo}, + [NCmpI+Cfuo] = {NCmpI+Cfo, NCmpI+Cfuo}, +}; + +int +cmpneg(int c) +{ + assert(0 <= c && c < NCmp); + return cmptab[c][0]; +} + +int +cmpop(int c) +{ + assert(0 <= c && c < NCmp); + return cmptab[c][1]; +} + int clsmerge(short *pk, short k) { @@ -256,6 +322,30 @@ addcon(Con *c0, Con *c1) } } +void +blit(Ref rdst, uint doff, Ref rsrc, uint sz, Fn *fn) +{ + struct { int st, ld, cls, size; } *p, tbl[] = { + { Ostorel, Oload, Kl, 8 }, + { Ostorew, Oload, Kw, 8 }, + { Ostoreh, Oloaduh, Kw, 2 }, + { Ostoreb, Oloadub, Kw, 1 } + }; + Ref r, r1; + uint boff, s; + + for (boff=0, p=tbl; sz; p++) + for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) { + r = newtmp("blt", Kl, fn); + r1 = newtmp("blt", Kl, fn); + emit(p->st, 0, R, r, r1); + emit(Oadd, Kl, r1, rdst, getcon(doff, fn)); + r1 = newtmp("blt", Kl, fn); + emit(p->ld, p->cls, r, r1, R); + emit(Oadd, Kl, r1, rsrc, getcon(boff, fn)); + } +} + void bsinit(BSet *bs, uint n) { -- cgit 1.4.1