diff options
author | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2016-03-25 14:02:43 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2016-03-25 14:02:43 -0400 |
commit | 62e238a6ef151d56b79e1f076a57463f2e1fb020 (patch) | |
tree | 29c858054c62230eb73330f165cf30ff20e14d86 /src | |
parent | 97b58def96d47d937d86849380d8316ddb16bed8 (diff) | |
download | roux-62e238a6ef151d56b79e1f076a57463f2e1fb020.tar.gz |
great renaming campain!
Diffstat (limited to 'src')
53 files changed, 8122 insertions, 0 deletions
diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..0416fa9 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,5 @@ +qbe +doc +.comfile +*.o +*.out diff --git a/src/.tag b/src/.tag new file mode 100644 index 0000000..5b8c210 --- /dev/null +++ b/src/.tag @@ -0,0 +1,11 @@ +Look slot( + +Get lisc.h +Get parse.c +Get isel.c +Get spill.c +Get rega.c +Get emit.c + +New +|fmt diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..b9c87df --- /dev/null +++ b/src/Makefile @@ -0,0 +1,17 @@ +BIN = qbe +OBJ = main.o util.o parse.o mem.o ssa.o copy.o live.o isel.o spill.o rega.o emit.o + +CFLAGS = -Wall -Wextra -std=c99 -g -pedantic + +$(BIN): $(OBJ) + $(CC) $(LDFLAGS) $(OBJ) -o $@ + +$(OBJ): all.h + +.PHONY: clean check syndoc +clean: + rm -f $(BIN) $(OBJ) +check: $(BIN) + test/go.sh all +syndoc: + unison -auto doc ssh://qcar@h/data/d/ssa-doc diff --git a/src/all.h b/src/all.h new file mode 100644 index 0000000..e0542da --- /dev/null +++ b/src/all.h @@ -0,0 +1,554 @@ +#include <assert.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1] + +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned long ulong; +typedef unsigned long bits; + +typedef struct BSet BSet; +typedef struct Ref Ref; +typedef struct OpDesc OpDesc; +typedef struct Ins Ins; +typedef struct Phi Phi; +typedef struct Blk Blk; +typedef struct Use Use; +typedef struct Tmp Tmp; +typedef struct Con Con; +typedef struct Addr Mem; +typedef struct Fn Fn; +typedef struct Typ Typ; +typedef struct Dat Dat; + +enum Reg { + RXX, + + RAX, /* caller-save */ + RCX, + RDX, + RSI, + RDI, + R8, + R9, + R10, + R11, + + RBX, /* callee-save */ + R12, + R13, + R14, + R15, + + RBP, /* reserved */ + RSP, + + XMM0, /* sse */ + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, + XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15, + + Tmp0, /* first non-reg temporary */ + + NIReg = R12 - RAX + 1, + NFReg = XMM14 - XMM0 + 1, + NISave = 9, + NFSave = NFReg, + NRSave = NISave + NFSave, + NRClob = 5, +}; + +enum { + NString = 32, + NPred = 63, + NIns = 8192, + NAlign = 3, + NSeg = 32, + NTyp = 128, + NBit = CHAR_BIT * sizeof(bits), +}; + +MAKESURE(NBit_is_enough, NBit >= (int)Tmp0); + +#define BIT(n) ((bits)1 << (n)) + +struct BSet { + uint nt; + bits *t; +}; + +struct Ref { + uint16_t type:2; + uint16_t val:14; +}; + +enum Alt { + AType, + ACall, + AMem, + + AShift = 12, + AMask = (1<<AShift) - 1 +}; + +enum { + RTmp, + RCon, + RSlot, + RAlt, + + RAType = RAlt + AType, + RACall = RAlt + ACall, + RAMem = RAlt + AMem, + + NRef = (1<<14) - 1 +}; + +#define R (Ref){0, 0} +#define TMP(x) (Ref){RTmp, x} +#define CON(x) (Ref){RCon, x} +#define CON_Z CON(0) /* reserved zero constant */ +#define SLOT(x) (Ref){RSlot, x} +#define TYPE(x) (Ref){RAlt, (x)|(AType<<AShift)} +#define CALL(x) (Ref){RAlt, (x)|(ACall<<AShift)} +#define MEM(x) (assert(x<(1<<AShift) && "too many mems"), \ + (Ref){RAlt, (x)|(AMem<<AShift)}) + +static inline int req(Ref a, Ref b) +{ + return a.type == b.type && a.val == b.val; +} + +static inline int rtype(Ref r) +{ + if (req(r, R)) + return -1; + if (r.type == RAlt) + return RAlt + (r.val >> AShift); + return r.type; +} + +static inline int isreg(Ref r) +{ + return rtype(r) == RTmp && r.val < Tmp0; +} + +enum ICmp { +#define ICMPS(X) \ + X(ule) \ + X(ult) \ + X(sle) \ + X(slt) \ + X(sgt) \ + X(sge) \ + X(ugt) \ + X(uge) \ + X(eq) \ + X(ne) /* make sure icmpop() below works! */ + +#define X(c) IC##c, + ICMPS(X) +#undef X + NICmp, + + ICXnp = NICmp, /* x64 specific */ + ICXp, + NXICmp +}; + +static inline int icmpop(int c) +{ + return c >= ICeq ? c : ICuge - c; +} + +enum FCmp { +#define FCMPS(X) \ + X(le) \ + X(lt) \ + X(gt) \ + X(ge) \ + X(ne) \ + X(eq) \ + X(o) \ + X(uo) + +#define X(c) FC##c, + FCMPS(X) +#undef X + NFCmp +}; + +enum Class { + Kw, + Kl, + Ks, + Kd +}; + +#define KWIDE(k) ((k)&1) +#define KBASE(k) ((k)>>1) + +enum Op { + OXXX, + + /* public instructions */ + OAdd, + OSub, + ODiv, + ORem, + OUDiv, + OURem, + OMul, + OAnd, + OOr, + OXor, + OSar, + OShr, + OShl, + OCmpw, + OCmpw1 = OCmpw + NICmp-1, + OCmpl, + OCmpl1 = OCmpl + NICmp-1, + OCmps, + OCmps1 = OCmps + NFCmp-1, + OCmpd, + OCmpd1 = OCmpd + NFCmp-1, + + OStored, + OStores, + OStorel, + OStorew, + OStoreh, + OStoreb, +#define isstore(o) (OStored <= o && o <= OStoreb) + OLoadsw, /* needs to match OExt (mem.c) */ + OLoaduw, + OLoadsh, + OLoaduh, + OLoadsb, + OLoadub, + OLoad, +#define isload(o) (OLoadsw <= o && o <= OLoad) + OExtsw, + OExtuw, + OExtsh, + OExtuh, + OExtsb, + OExtub, +#define isext(o) (OExtsw <= o && o <= OExtub) + + OExts, + OTruncd, + OFtosi, + OSitof, + OCast, + + OAlloc, + OAlloc1 = OAlloc + NAlign-1, + + OCopy, + NPubOp, + + /* function instructions */ + OPar = NPubOp, + OParc, + OArg, + OArgc, + OCall, + + /* reserved instructions */ + ONop, + OAddr, + OSwap, + OSign, + OSAlloc, + OXIDiv, + OXDiv, + OXCmp, + OXSet, + OXSetnp = OXSet + ICXnp, + OXSetp = OXSet + ICXp, + OXTest, + NOp +}; + +enum Jmp { + JXXX, + JRet0, + JRetw, + JRetl, + JRets, + JRetd, + JRetc, +#define isret(j) (JRet0 <= j && j <= JRetc) + JJmp, + JJnz, + JXJc, + JXJnp = JXJc + ICXnp, + JXJp = JXJc + ICXp, + NJmp +}; + +struct OpDesc { + char *name; + int nmem; + char argcls[2][4]; + uint sflag:1; /* sets the zero flag */ + uint lflag:1; /* leaves flags */ +}; + +struct Ins { + ushort op:14; + Ref to; + Ref arg[2]; + ushort cls:2; +}; + +struct Phi { + Ref to; + Ref arg[NPred]; + Blk *blk[NPred]; + uint narg; + int cls; + Phi *link; +}; + +struct Blk { + Phi *phi; + Ins *ins; + uint nins; + struct { + short type; + Ref arg; + } jmp; + Blk *s1; + Blk *s2; + Blk *link; + + int id; + int visit; + + Blk *idom; + Blk *dom, *dlink; + Blk **fron; + int nfron; + + Blk **pred; + uint npred; + BSet in[1], out[1], gen[1]; + int nlive[2]; + int loop; + char name[NString]; +}; + +struct Use { + enum { + UXXX, + UPhi, + UIns, + UJmp, + } type; + int bid; + union { + Ins *ins; + Phi *phi; + } u; +}; + +struct Tmp { + char name[NString]; + Use *use; + uint ndef, nuse; + uint cost; + short slot; + short cls; + struct { + int r; + bits m; + } hint; + int phi; + int visit; +}; + +struct Con { + enum { + CUndef, + CBits, + CAddr, + } type; + char label[NString]; + union { + int64_t i; + double d; + float s; + } bits; + char flt; /* for printing, see parse.c */ +}; + +typedef struct Addr Addr; + +struct Addr { /* x64 addressing */ + Con offset; + Ref base; + Ref index; + int scale; +}; + +struct Fn { + Blk *start; + Tmp *tmp; + Con *con; + Mem *mem; + int ntmp; + int ncon; + int nmem; + int nblk; + int retty; /* index in typ[], -1 if no aggregate return */ + Ref retr; + Blk **rpo; + bits reg; + int slot; + char name[NString]; +}; + +struct Typ { + char name[NString]; + int dark; + uint size; + int align; + + struct { + uint isflt:1; + uint ispad:1; + uint len:30; + } seg[NSeg+1]; +}; + +struct Dat { + enum { + DStart, + DEnd, + DName, + DAlign, + DB, + DH, + DW, + DL, + DZ + } type; + union { + int64_t num; + double fltd; + float flts; + char *str; + struct { + char *nam; + int64_t off; + } ref; + } u; + char isref; + char isstr; +}; + + +/* main.c */ +extern char debug['Z'+1]; + +/* util.c */ +extern Typ typ[NTyp]; +extern Ins insb[NIns], *curi; +void diag(char *) __attribute__((noreturn)); +void *emalloc(size_t); +void *alloc(size_t); +void freeall(void); +Blk *blknew(void); +void emit(int, int, Ref, Ref, Ref); +void emiti(Ins); +void idup(Ins **, Ins *, ulong); +Ins *icpy(Ins *, Ins *, ulong); +void *vnew(ulong, size_t); +void vgrow(void *, ulong); +int phicls(int, Tmp *); +Ref newtmp(char *, int, Fn *); +Ref getcon(int64_t, Fn *); +void addcon(Con *, Con *); +void dumpts(BSet *, Tmp *, FILE *); + +void bsinit(BSet *, uint); +void bszero(BSet *); +uint bscount(BSet *); +void bsset(BSet *, uint); +void bsclr(BSet *, uint); +void bscopy(BSet *, BSet *); +void bsunion(BSet *, BSet *); +void bsinter(BSet *, BSet *); +void bsdiff(BSet *, BSet *); +int bsequal(BSet *, BSet *); +int bsiter(BSet *, uint *); + +static inline int +bshas(BSet *bs, uint elt) +{ + assert(elt < bs->nt * NBit); + return (bs->t[elt/NBit] & BIT(elt%NBit)) != 0; +} + +/* parse.c */ +extern OpDesc opdesc[NOp]; +void parse(FILE *, char *, void (Dat *), void (Fn *)); +void printfn(Fn *, FILE *); +void printref(Ref, Fn *, FILE *); +void err(char *, ...); + +/* mem.c */ +void memopt(Fn *); + +/* ssa.c */ +void filluse(Fn *); +void fillpreds(Fn *); +void fillrpo(Fn *); +void ssa(Fn *); + +/* copy.c */ +void copy(Fn *); + +/* live.c */ +void liveon(BSet *, Blk *, Blk *); +void filllive(Fn *); + +/* isel.c */ +extern int rsave[/* NRSave */]; +extern int rclob[/* NRClob */]; +bits retregs(Ref, int[2]); +bits argregs(Ref, int[2]); +void isel(Fn *); + +/* spill.c */ +void fillcost(Fn *); +void spill(Fn *); + +/* rega.c */ +void rega(Fn *); + +/* emit.c */ +void emitfn(Fn *, FILE *); +void emitdat(Dat *, FILE *); +int stashfp(int64_t, int); +void emitfin(FILE *); diff --git a/src/copy.c b/src/copy.c new file mode 100644 index 0000000..ef2d01d --- /dev/null +++ b/src/copy.c @@ -0,0 +1,159 @@ +#include "all.h" + +typedef struct RList RList; +struct RList { + int t; + RList *l; +}; + +static Ref +copyof(Ref r, Ref *cp) +{ + if (rtype(r) == RTmp) + return cp[r.val]; + else + return r; +} + +static void +update(Ref r, Ref rcp, Ref *cp, RList **w) +{ + RList *l; + + if (!req(cp[r.val], rcp)) { + cp[r.val] = rcp; + l = emalloc(sizeof *l); + l->t = r.val; + l->l = *w; + *w = l; + } +} + +static void +visitphi(Phi *p, Ref *cp, RList **w) +{ + uint a; + Ref r, r1; + + r = R; + for (a=0; a<p->narg; a++) { + r1 = copyof(p->arg[a], cp); + if (req(r1, R)) + continue; + if (req(r, R) || req(r, r1)) + r = r1; + else { + r = p->to; + break; + } + } + assert(!req(r, R)); + update(p->to, r, cp, w); +} + +static void +visitins(Ins *i, Ref *cp, RList **w) +{ + Ref r; + + if (i->op == OCopy) { + r = copyof(i->arg[0], cp); + update(i->to, r, cp, w); + } else if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + update(i->to, i->to, cp, w); + } +} + +void +copy(Fn *fn) +{ + Blk *b; + Ref *cp, r; + RList *w, *w1; + Use *u, *u1; + Ins *i; + Phi *p, **pp; + uint a; + int t; + + w = 0; + cp = emalloc(fn->ntmp * sizeof cp[0]); + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) + visitphi(p, cp, &w); + for (i=b->ins; i-b->ins < b->nins; i++) + visitins(i, cp, &w); + } + while ((w1=w)) { + t = w->t; + w = w->l; + free(w1); + u = fn->tmp[t].use; + u1 = u + fn->tmp[t].nuse; + for (; u<u1; u++) + switch (u->type) { + default: + diag("copy: invalid use"); + case UPhi: + visitphi(u->u.phi, cp, &w); + break; + case UIns: + visitins(u->u.ins, cp, &w); + break; + case UJmp: + break; + } + } + for (b=fn->start; b; b=b->link) { + for (pp=&b->phi; (p=*pp);) { + r = cp[p->to.val]; + if (!req(r, p->to)) { + *pp = p->link; + continue; + } + for (a=0; a<p->narg; a++) + if (rtype(p->arg[a]) == RTmp) { + r = cp[p->arg[a].val]; + assert(!req(r, R)); + p->arg[a] = r; + } + pp=&p->link; + } + for (i=b->ins; i-b->ins < b->nins; i++) { + r = cp[i->to.val]; + if (!req(r, i->to)) { + *i = (Ins){.op = ONop}; + continue; + } + for (a=0; a<2; a++) + if (rtype(i->arg[a]) == RTmp) { + r = cp[i->arg[a].val]; + assert(!req(r, R)); + i->arg[a] = r; + } + } + if (rtype(b->jmp.arg) == RTmp) { + r = cp[b->jmp.arg.val]; + assert(!req(r, R)); + b->jmp.arg = r; + } + } + if (debug['C']) { + fprintf(stderr, "\n> Copy information:"); + for (t=Tmp0; t<fn->ntmp; t++) { + if (req(cp[t], R)) { + fprintf(stderr, "\n%10s not seen!", + fn->tmp[t].name); + } + else if (!req(cp[t], TMP(t))) { + fprintf(stderr, "\n%10s copy of ", + fn->tmp[t].name); + printref(cp[t], fn, stderr); + } + } + fprintf(stderr, "\n\n> After copy elimination:\n"); + printfn(fn, stderr); + } + free(cp); +} diff --git a/src/emit.c b/src/emit.c new file mode 100644 index 0000000..b9dc782 --- /dev/null +++ b/src/emit.c @@ -0,0 +1,666 @@ +#include "all.h" + +enum { + SLong = 0, + SWord = 1, + SShort = 2, + SByte = 3, + + Ki = -1, /* matches Kw and Kl */ + Ka = -2, /* matches all classes */ +}; + +/* Instruction format strings: + * + * if the format string starts with -, the instruction + * is assumed to be 3-address and is put in 2-address + * mode using an extra mov if necessary + * + * if the format string starts with +, the same as the + * above applies, but commutativity is also assumed + * + * %k is used to set the class of the instruction, + * it'll expand to "l", "q", "ss", "sd", depending + * on the instruction class + * %0 designates the first argument + * %1 designates the second argument + * %= designates the result + * + * if %k is not used, a prefix to 0, 1, or = must be + * added, it can be: + * M - memory reference + * L - long (64 bits) + * W - word (32 bits) + * H - short (16 bits) + * B - byte (8 bits) + * S - single precision float + * D - double precision float + */ +static struct { + short op; + short cls; + char *asm; +} omap[] = { + { OAdd, Ka, "+add%k %1, %=" }, + { OSub, Ka, "-sub%k %1, %=" }, + { OAnd, Ki, "+and%k %1, %=" }, + { OOr, Ki, "+or%k %1, %=" }, + { OXor, Ki, "+xor%k %1, %=" }, + { OSar, Ki, "-sar%k %B1, %=" }, + { OShr, Ki, "-shr%k %B1, %=" }, + { OShl, Ki, "-shl%k %B1, %=" }, + { OMul, Ki, "+imul%k %1, %=" }, + { OMul, Ks, "+mulss %1, %=" }, /* fixme */ + { OMul, Kd, "+mulsd %1, %=" }, + { ODiv, Ka, "-div%k %1, %=" }, + { OStorel, Ka, "movq %L0, %M1" }, + { OStorew, Ka, "movl %W0, %M1" }, + { OStoreh, Ka, "movw %H0, %M1" }, + { OStoreb, Ka, "movb %B0, %M1" }, + { OStores, Ka, "movss %S0, %M1" }, + { OStored, Ka, "movsd %D0, %M1" }, + { OLoad, Ka, "mov%k %M0, %=" }, + { OLoadsw, Kl, "movslq %M0, %L=" }, + { OLoadsw, Kw, "movl %M0, %W=" }, + { OLoaduw, Ki, "movl %M0, %W=" }, + { OLoadsh, Ki, "movsw%k %M0, %=" }, + { OLoaduh, Ki, "movzw%k %M0, %=" }, + { OLoadsb, Ki, "movsb%k %M0, %=" }, + { OLoadub, Ki, "movzb%k %M0, %=" }, + { OExtsw, Kl, "movslq %W0, %L=" }, + { OExtuw, Kl, "movl %W0, %W=" }, + { OExtsh, Ki, "movsw%k %H0, %=" }, + { OExtuh, Ki, "movzw%k %H0, %=" }, + { OExtsb, Ki, "movsb%k %B0, %=" }, + { OExtub, Ki, "movzb%k %B0, %=" }, + + { OExts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */ + { OTruncd, Ks, "cvttsd2ss %0, %=" }, + { OFtosi, Kw, "cvttss2si %0, %=" }, + { OFtosi, Kl, "cvttsd2si %0, %=" }, + { OSitof, Ks, "cvtsi2ss %W0, %=" }, + { OSitof, Kd, "cvtsi2sd %L0, %=" }, + { OCast, Ki, "movq %D0, %L=" }, + { OCast, Ka, "movq %L0, %D=" }, + + { OAddr, Ki, "lea%k %M0, %=" }, + { OSwap, Ki, "xchg%k %0, %1" }, + { OSign, Kl, "cqto" }, + { OSign, Kw, "cltd" }, + { OXDiv, Ki, "div%k %0" }, + { OXIDiv, Ki, "idiv%k %0" }, + { OXCmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */ + { OXCmp, Kd, "comisd %D0, %D1" }, + { OXCmp, Ki, "cmp%k %0, %1" }, + { OXTest, Ki, "test%k %0, %1" }, + { OXSet+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" }, + { OXSet+ICXp, Ki, "setp %B=\n\tmovsb%k %B=, %=" }, + { NOp, 0, 0 } +}; + +static char *rname[][4] = { + [RAX] = {"rax", "eax", "ax", "al"}, + [RBX] = {"rbx", "ebx", "bx", "bl"}, + [RCX] = {"rcx", "ecx", "cx", "cl"}, + [RDX] = {"rdx", "edx", "dx", "dl"}, + [RSI] = {"rsi", "esi", "si", "sil"}, + [RDI] = {"rdi", "edi", "di", "dil"}, + [RBP] = {"rbp", "ebp", "bp", "bpl"}, + [RSP] = {"rsp", "esp", "sp", "spl"}, + [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, + [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, + [R10] = {"r10", "r10d", "r10w", "r10b"}, + [R11] = {"r11", "r11d", "r11w", "r11b"}, + [R12] = {"r12", "r12d", "r12w", "r12b"}, + [R13] = {"r13", "r13d", "r13w", "r13b"}, + [R14] = {"r14", "r14d", "r14w", "r14b"}, + [R15] = {"r15", "r15d", "r15w", "r15b"}, +}; + + +static int +slot(int s, Fn *fn) +{ + struct { int i:14; } x; + + /* sign extend s using a bitfield */ + x.i = s; + assert(NAlign == 3); + if (x.i < 0) + return -4 * x.i; + else { + assert(fn->slot >= x.i); + return -4 * (fn->slot - x.i); + } +} + +static void +emitcon(Con *con, FILE *f) +{ + switch (con->type) { + default: + diag("emit: invalid constant"); + case CAddr: + fputs(con->label, f); + if (con->bits.i) + fprintf(f, "%+"PRId64, con->bits.i); + break; + case CBits: + fprintf(f, "%"PRId64, con->bits.i); + break; + } +} + +static char * +regtoa(int reg, int sz) +{ + static char buf[6]; + + if (reg >= XMM0) { + sprintf(buf, "xmm%d", reg-XMM0); + return buf; + } else + return rname[reg][sz]; +} + +static Ref +getarg(char c, Ins *i) +{ + switch (c) { + default: + diag("emit: 0, 1, = expected in format"); + case '0': + return i->arg[0]; + case '1': + return i->arg[1]; + case '=': + return i->to; + } +} + +static void emitins(Ins, Fn *, FILE *); + +static void +emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) +{ + Ins icp; + + icp.op = OCopy; + icp.arg[0] = r2; + icp.to = r1; + icp.cls = k; + emitins(icp, fn, f); +} + +static void +emitf(char *s, Ins *i, Fn *fn, FILE *f) +{ + static char clstoa[][3] = {"l", "q", "ss", "sd"}; + char c; + int sz; + Ref ref; + Mem *m; + Con off; + + switch (*s) { + case '+': + if (req(i->arg[1], i->to)) { + ref = i->arg[0]; + i->arg[0] = i->arg[1]; + i->arg[1] = ref; + } + /* fall through */ + case '-': + if (req(i->arg[1], i->to) && !req(i->arg[0], i->to)) + diag("emit: cannot convert to 2-address"); + emitcopy(i->to, i->arg[0], i->cls, fn, f); + s++; + break; + } + + fputc('\t', f); +Next: + while ((c = *s++) != '%') + if (!c) { + fputc('\n', f); + return; + } else + fputc(c, f); + switch ((c = *s++)) { + default: + diag("emit: invalid escape"); + case '%': + fputc('%', f); + break; + case 'k': + fputs(clstoa[i->cls], f); + break; + case '0': + case '1': + case '=': + sz = KWIDE(i->cls) ? SLong : SWord; + s--; + /* fall through */ + case 'D': + case 'S': + Ref: + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + default: + diag("emit: invalid reference"); + case RTmp: + assert(isreg(ref)); + fprintf(f, "%%%s", regtoa(ref.val, sz)); + break; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RAMem: + Mem: + m = &fn->mem[ref.val & AMask]; + if (rtype(m->base) == RSlot) { + off.type = CBits; + off.bits.i = slot(m->base.val, fn); + addcon(&m->offset, &off); + m->base = TMP(RBP); + } + if (m->offset.type != CUndef) + emitcon(&m->offset, f); + if (req(m->base, R) && req(m->index, R)) + break; + fputc('(', f); + if (!req(m->base, R)) + fprintf(f, "%%%s", regtoa(m->base.val, SLong)); + if (!req(m->index, R)) + fprintf(f, ", %%%s, %d", + regtoa(m->index.val, SLong), + m->scale + ); + fputc(')', f); + break; + case RCon: + fputc('$', f); + emitcon(&fn->con[ref.val], f); + break; + } + break; + case 'L': + sz = SLong; + goto Ref; + case 'W': + sz = SWord; + goto Ref; + case 'H': + sz = SShort; + goto Ref; + case 'B': + sz = SByte; + goto Ref; + case 'M': + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + default: + diag("emit: invalid memory reference"); + case RAMem: + goto Mem; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RCon: + emitcon(&fn->con[ref.val], f); + fprintf(f, "(%%rip)"); + break; + case RTmp: + assert(isreg(ref)); + fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); + break; + } + break; + } + goto Next; +} + +static void +emitins(Ins i, Fn *fn, FILE *f) +{ + Ref r; + int64_t val; + int o; + + switch (i.op) { + default: + Table: + /* most instructions are just pulled out of + * the table omap[], some special cases are + * detailed below */ + for (o=0;; o++) { + /* this linear search should really be a binary + * search */ + if (omap[o].op == NOp) + diag("emit: no entry found for instruction"); + if (omap[o].op == i.op) + if (omap[o].cls == i.cls + || (omap[o].cls == Ki && KBASE(i.cls) == 0) + || (omap[o].cls == Ka)) + break; + } + emitf(omap[o].asm, &i, fn, f); + break; + case ONop: + /* just do nothing for nops, they are inserted + * by some passes */ + break; + case OMul: + /* here, we try to use the 3-addresss form + * of multiplication when possible */ + if (rtype(i.arg[1]) == RCon) { + r = i.arg[0]; + i.arg[0] = i.arg[1]; + i.arg[1] = r; + } + if (KBASE(i.cls) == 0 /* only available for ints */ + && rtype(i.arg[0]) == RCon + && rtype(i.arg[1]) == RTmp) { + emitf("imul%k %0, %1, %=", &i, fn, f); + break; + } + goto Table; + case OSub: + /* we have to use the negation trick to handle + * some 3-address substractions */ + if (req(i.to, i.arg[1])) { + emitf("neg%k %=", &i, fn, f); + emitf("add%k %0, %=", &i, fn, f); + break; + } + goto Table; + case OCopy: + /* make sure we don't emit useless copies, + * also, we can use a trick to load 64-bits + * registers, it's detailed in my note below + * http://c9x.me/art/notes.html?09/19/2015 */ + if (req(i.to, R) || req(i.arg[0], R)) + break; + if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && i.cls == Kl + && fn->con[i.arg[0].val].type == CBits + && (val = fn->con[i.arg[0].val].bits.i) >= 0 + && val <= UINT32_MAX) { + emitf("movl %W0, %W=", &i, fn, f); + } else if (!req(i.arg[0], i.to)) + emitf("mov%k %0, %=", &i, fn, f); + break; + case OCall: + /* calls simply have a weird syntax in AT&T + * assembly... */ + switch (rtype(i.arg[0])) { + default: + diag("emit: invalid call instruction"); + case RCon: + fprintf(f, "\tcallq "); + emitcon(&fn->con[i.arg[0].val], f); + fprintf(f, "\n"); + break; + case RTmp: + emitf("callq *%L0", &i, fn, f); + break; + } + break; + case OSAlloc: + /* there is no good reason why this is here + * maybe we should split OSAlloc in 2 different + * instructions depending on the result + */ + emitf("subq %L0, %%rsp", &i, fn, f); + if (!req(i.to, R)) + emitcopy(i.to, TMP(RSP), Kl, fn, f); + break; + case OSwap: + if (KBASE(i.cls) == 0) + goto Table; + /* for floats, there is no swap instruction + * so we use xmm15 as a temporary + */ + emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); + emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); + emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); + break; + } +} + +static int +cneg(int cmp) +{ + switch (cmp) { + default: diag("emit: cneg() unhandled comparison"); + case ICule: return ICugt; + case ICult: return ICuge; + case ICsle: return ICsgt; + case ICslt: return ICsge; + case ICsgt: return ICsle; + case ICsge: return ICslt; + case ICugt: return ICule; + case ICuge: return ICult; + case ICeq: return ICne; + case ICne: return ICeq; + case ICXnp: return ICXp; + case ICXp: return ICXnp; + } +} + +static int +framesz(Fn *fn) +{ + int i, o, f; + + assert(NAlign == 3); + for (i=0, o=0; i<NRClob; i++) + o ^= 1 & (fn->reg >> rclob[i]); + f = fn->slot; + f = (f + 3) & -4; + return 4*f + 8*o; +} + +void +emitfn(Fn *fn, FILE *f) +{ + static char *ctoa[] = { + [ICeq] = "z", + [ICule] = "be", + [ICult] = "b", + [ICsle] = "le", + [ICslt] = "l", + [ICsgt] = "g", + [ICsge] = "ge", + [ICugt] = "a", + [ICuge] = "ae", + [ICne] = "nz", + [ICXnp] = "np", + [ICXp] = "p" + }; + Blk *b, *s; + Ins *i, itmp; + int *r, c, fs; + + fprintf(f, + ".text\n" + ".globl %s\n" + ".type %s, @function\n" + "%s:\n" + "\tpush %%rbp\n" + "\tmov %%rsp, %%rbp\n", + fn->name, fn->name, fn->name + ); + fs = framesz(fn); + if (fs) + fprintf(f, "\tsub $%d, %%rsp\n", fs); + for (r=rclob; r-rclob < NRClob; r++) + if (fn->reg & BIT(*r)) { + itmp.arg[0] = TMP(*r); + emitf("pushq %L0", &itmp, fn, f); + } + + for (b=fn->start; b; b=b->link) { + fprintf(f, ".L%s:\n", b->name); + for (i=b->ins; i!=&b->ins[b->nins]; i++) + emitins(*i, fn, f); + switch (b->jmp.type) { + case JRet0: + for (r=&rclob[NRClob]; r>rclob;) + if (fn->reg & BIT(*--r)) { + itmp.arg[0] = TMP(*r); + emitf("popq %L0", &itmp, fn, f); + } + fprintf(f, + "\tleave\n" + "\tret\n" + ); + break; + case JJmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp .L%s\n", b->s1->name); + break; + default: + c = b->jmp.type - JXJc; + if (0 <= c && c <= NXICmp) { + if (b->link == b->s2) { + s = b->s1; + } else if (b->link == b->s1) { + c = cneg(c); + s = b->s2; + } else + diag("emit: unhandled jump (1)"); + fprintf(f, "\tj%s .L%s\n", ctoa[c], s->name); + break; + } + diag("emit: unhandled jump (2)"); + } + } + +} + +void +emitdat(Dat *d, FILE *f) +{ + static int align; + static char *dtoa[] = { + [DAlign] = ".align", + [DB] = "\t.byte", + [DH] = "\t.value", + [DW] = "\t.long", + [DL] = "\t.quad" + }; + + switch (d->type) { + case DStart: + align = 0; + fprintf(f, ".data\n"); + break; + case DEnd: + break; + case DName: + if (!align) + fprintf(f, ".align 8\n"); + fprintf(f, + ".globl %s\n" + ".type %s, @object\n" + "%s:\n", + d->u.str, d->u.str, d->u.str + ); + break; + case DZ: + fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num); + break; + default: + if (d->type == DAlign) + align = 1; + + if (d->isstr) { + if (d->type != DB) + err("strings only supported for 'b' currently"); + fprintf(f, "\t.ascii \"%s\"\n", d->u.str); + } + else if (d->isref) { + fprintf(f, "%s %s%+"PRId64"\n", + dtoa[d->type], d->u.ref.nam, + d->u.ref.off); + } + else { + fprintf(f, "%s %"PRId64"\n", + dtoa[d->type], d->u.num); + } + break; + } +} + +typedef struct FBits FBits; + +struct FBits { + int64_t bits; + int wide; + FBits *link; +}; + +static FBits *stash; + +int +stashfp(int64_t n, int w) +{ + FBits **pb, *b; + int i; + + /* does a dumb de-dup of fp constants + * this should be the linker's job */ + for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++) + if (n == b->bits && w == b->wide) + return i; + b = emalloc(sizeof *b); + b->bits = n; + b->wide = w; + b->link = 0; + *pb = b; + return i; +} + +void +emitfin(FILE *f) +{ + FBits *b; + int i; + + if (!stash) + return; + fprintf(f, "/* floating point constants */\n"); + fprintf(f, ".data\n.align 8\n"); + for (b=stash, i=0; b; b=b->link, i++) + if (b->wide) + fprintf(f, + ".Lfp%d:\n" + "\t.quad %"PRId64 + " /* %f */\n", + i, b->bits, + *(double *)&b->bits + ); + for (b=stash, i=0; b; b=b->link, i++) + if (!b->wide) + fprintf(f, + ".Lfp%d:\n" + "\t.long %"PRId64 + " /* %lf */\n", + i, b->bits & 0xffffffff, + *(float *)&b->bits + ); + while ((b=stash)) { + stash = b->link; + free(b); + } +} diff --git a/src/isel.c b/src/isel.c new file mode 100644 index 0000000..48e29ef --- /dev/null +++ b/src/isel.c @@ -0,0 +1,1135 @@ +#include "all.h" +#include <limits.h> + +/* For x86_64, do the following: + * + * - lower calls + * - check that constants are used only in + * places allowed + * - ensure immediates always fit in 32b + * - explicit machine register contraints + * on instructions like division. + * - implement fast locals (the streak of + * constant allocX in the first basic block) + * - recognize complex addressing modes + * + * Invariant: the use counts that are used + * in sel() must be sound. This + * is not so trivial, maybe the + * dce should be moved out... + */ + +typedef struct ANum ANum; +typedef struct AClass AClass; +typedef struct RAlloc RAlloc; + +struct ANum { + char n, l, r; + Ins *i; + Ref mem; +}; + +static void amatch(Addr *, Ref, ANum *, Fn *, int); + +static int +fcmptoi(int fc) +{ + switch (fc) { + default: diag("isel: fcmptoi defaulted"); + case FCle: return ICule; + case FClt: return ICult; + case FCgt: return ICugt; + case FCge: return ICuge; + case FCne: return ICne; + case FCeq: return ICeq; + case FCo: return ICXnp; + case FCuo: return ICXp; + } +} + +static int +iscmp(int op, int *pk, int *pc) +{ + int k, c; + + if (OCmpw <= op && op <= OCmpw1) { + c = op - OCmpw; + k = Kw; + } + else if (OCmpl <= op && op <= OCmpl1) { + c = op - OCmpl; + k = Kl; + } + else if (OCmps <= op && op <= OCmps1) { + c = fcmptoi(op - OCmps); + k = Ks; + } + else if (OCmpd <= op && op <= OCmpd1) { + c = fcmptoi(op - OCmpd); + k = Kd; + } + else + return 0; + if (pk) + *pk = k; + if (pc) + *pc = c; + return 1; +} + +static int +noimm(Ref r, Fn *fn) +{ + int64_t val; + + if (rtype(r) != RCon) + return 0; + switch (fn->con[r.val].type) { + default: + diag("isel: invalid constant"); + case CAddr: + /* we only support the 'small' + * code model of the ABI, this + * means that we can always + * address data with 32bits + */ + return 0; + case CBits: + val = fn->con[r.val].bits.i; + return (val < INT32_MIN || val > INT32_MAX); + } +} + +static int +rslot(Ref r, Fn *fn) +{ + if (rtype(r) != RTmp) + return -1; + return fn->tmp[r.val].slot; +} + +static int +argcls(Ins *i, int n) +{ + return opdesc[i->op].argcls[n][i->cls]; +} + +static void +fixarg(Ref *r, int k, int phi, Fn *fn) +{ + Addr a; + Ref r0, r1; + int s, n; + + r1 = r0 = *r; + s = rslot(r0, fn); + if (KBASE(k) == 1 && rtype(r0) == RCon) { + /* load floating points from memory + * slots, they can't be used as + * immediates + */ + r1 = MEM(fn->nmem); + vgrow(&fn->mem, ++fn->nmem); + memset(&a, 0, sizeof a); + a.offset.type = CAddr; + n = stashfp(fn->con[r0.val].bits.i, KWIDE(k)); + sprintf(a.offset.label, ".Lfp%d", n); + fn->mem[fn->nmem-1] = a; + } + else if (!phi && k == Kl && noimm(r0, fn)) { + /* load constants that do not fit in + * a 32bit signed integer into a + * long temporary + */ + r1 = newtmp("isel", Kl, fn); + emit(OCopy, Kl, r1, r0, R); + } + else if (s != -1) { + /* load fast locals' addresses into + * temporaries right before the + * instruction + */ + r1 = newtmp("isel", Kl, fn); + emit(OAddr, Kl, r1, SLOT(s), R); + } + *r = r1; +} + +static void +chuse(Ref r, int du, Fn *fn) +{ + if (rtype(r) == RTmp) + fn->tmp[r.val].nuse += du; +} + +static void +seladdr(Ref *r, ANum *an, Fn *fn) +{ + Addr a; + Ref r0, r1; + + r0 = *r; + if (rtype(r0) == RTmp) { + chuse(r0, -1, fn); + r1 = an[r0.val].mem; + if (req(r1, R)) { + amatch(&a, r0, an, fn, 1); + vgrow(&fn->mem, ++fn->nmem); + fn->mem[fn->nmem-1] = a; + r1 = MEM(fn->nmem-1); + chuse(a.base, +1, fn); + chuse(a.index, +1, fn); + if (rtype(a.base) != RTmp) + if (rtype(a.index) != RTmp) + an[r0.val].mem = r1; + } + *r = r1; + } +} + +static void +selcmp(Ref arg[2], int k, Fn *fn) +{ + Ref r; + + if (rtype(arg[0]) == RCon) { + r = arg[1]; + arg[1] = arg[0]; + arg[0] = r; + } + assert(rtype(arg[0]) != RCon); + emit(OXCmp, k, R, arg[1], arg[0]); + fixarg(&curi->arg[0], k, 0, fn); +} + +static void +sel(Ins i, ANum *an, Fn *fn) +{ + Ref r0, r1; + int x, k, kc; + int64_t val; + Ins *i0; + + if (rtype(i.to) == RTmp) + if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) + if (fn->tmp[i.to.val].nuse == 0) { + chuse(i.arg[0], -1, fn); + chuse(i.arg[1], -1, fn); + return; + } + i0 = curi; + k = i.cls; + switch (i.op) { + case ODiv: + case ORem: + case OUDiv: + case OURem: + if (i.op == ODiv || i.op == OUDiv) + r0 = TMP(RAX), r1 = TMP(RDX); + else + r0 = TMP(RDX), r1 = TMP(RAX); + emit(OCopy, k, i.to, r0, R); + emit(OCopy, k, R, r1, R); + if (rtype(i.arg[1]) == RCon) { + /* immediates not allowed for + * divisions in x86 + */ + r0 = newtmp("isel", k, fn); + } else + r0 = i.arg[1]; + if (i.op == ODiv || i.op == ORem) { + emit(OXIDiv, k, R, r0, R); + emit(OSign, k, TMP(RDX), TMP(RAX), R); + } else { + emit(OXDiv, k, R, r0, R); + emit(OCopy, k, TMP(RDX), CON_Z, R); + } + emit(OCopy, k, TMP(RAX), i.arg[0], R); + if (rtype(i.arg[1]) == RCon) + emit(OCopy, k, r0, i.arg[1], R); + break; + case OSar: + case OShr: + case OShl: + if (rtype(i.arg[1]) == RCon) + goto Emit; + r0 = i.arg[1]; + i.arg[1] = TMP(RCX); + emit(OCopy, Kw, R, TMP(RCX), R); + emiti(i); + emit(OCopy, Kw, TMP(RCX), r0, R); + break; + case ONop: + break; + case OStored: + case OStores: + case OStorel: + case OStorew: + case OStoreh: + case OStoreb: + if (rtype(i.arg[0]) == RCon) { + if (i.op == OStored) + i.op = OStorel; + if (i.op == OStores) + i.op = OStorew; + } + seladdr(&i.arg[1], an, fn); + goto Emit; + case_OLoad: + seladdr(&i.arg[0], an, fn); + goto Emit; + case OCall: + case OSAlloc: + case OCopy: + case OAdd: + case OSub: + case OMul: + case OAnd: + case OOr: + case OXor: + case OXTest: + case OFtosi: + case OSitof: + case OExts: + case OTruncd: + case OCast: + case_OExt: +Emit: + emiti(i); + fixarg(&curi->arg[0], argcls(curi, 0), 0, fn); + fixarg(&curi->arg[1], argcls(curi, 1), 0, fn); + break; + case OAlloc: + case OAlloc+1: + case OAlloc+2: /* == OAlloc1 */ + /* we need to make sure + * the stack remains aligned + * (rsp = 0) mod 16 + */ + if (rtype(i.arg[0]) == RCon) { + assert(fn->con[i.arg[0].val].type == CBits); + val = fn->con[i.arg[0].val].bits.i; + val = (val + 15) & ~INT64_C(15); + if (val < 0 || val > INT32_MAX) + diag("isel: alloc too large"); + emit(OSAlloc, Kl, i.to, getcon(val, fn), R); + } else { + /* r0 = (i.arg[0] + 15) & -16 */ + r0 = newtmp("isel", Kl, fn); + r1 = newtmp("isel", Kl, fn); + emit(OSAlloc, Kl, i.to, r0, R); + emit(OAnd, Kl, r0, r1, getcon(-16, fn)); + emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn)); + } + break; + default: + if (isext(i.op)) + goto case_OExt; + if (isload(i.op)) + goto case_OLoad; + if (iscmp(i.op, &kc, &x)) { + if (rtype(i.arg[0]) == RCon) + x = icmpop(x); + emit(OXSet+x, k, i.to, R, R); + selcmp(i.arg, kc, fn); + break; + } + diag("isel: non-exhaustive implementation"); + } + + while (i0 > curi && --i0) + if (rslot(i0->arg[0], fn) != -1 + || rslot(i0->arg[1], fn) != -1) + diag("isel: usupported address argument"); +} + +static Ins * +flagi(Ins *i0, Ins *i) +{ + while (i>i0) { + i--; + if (opdesc[i->op].sflag) + return i; + if (opdesc[i->op].lflag) + continue; + return 0; + } + return 0; +} + +struct AClass { + int inmem; + int align; + uint size; + int cls[2]; +}; + +static void +aclass(AClass *a, Typ *t) +{ + int e, s, n, cls; + uint sz, al; + + sz = t->size; + al = 1u << t->align; + + /* the ABI requires sizes to be rounded + * up to the nearest multiple of 8, moreover + * it makes it easy load and store structures + * in registers + */ + if (al < 8) + al = 8; + sz = (sz + al-1) & -al; + + a->size = sz; + a->align = t->align; + + if (t->dark || sz > 16) { + /* large or unaligned structures are + * required to be passed in memory + */ + a->inmem = 1; + return; + } + + a->inmem = 0; + for (e=0, s=0; e<2; e++) { + cls = -1; + for (n=0; n<8 && t->seg[s].len; s++) { + if (t->seg[s].ispad) { + /* don't change anything */ + } + else if (t->seg[s].isflt) { + if (cls == -1) + cls = Kd; + } + else + cls = Kl; + n += t->seg[s].len; + } + assert(n <= 8); + a->cls[e] = cls; + } +} + +static void +blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) +{ + Ref r, r1; + uint boff; + + /* it's an impolite blit, we might go across the end + * of the source object a little bit... */ + for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { + r = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, r, r1); + emit(OAdd, Kl, r1, rstk, getcon(soff, fn)); + r1 = newtmp("abi", Kl, fn); + emit(OLoad, Kl, r, r1, R); + emit(OAdd, Kl, r1, rsrc, getcon(boff, fn)); + chuse(rsrc, +1, fn); + chuse(rstk, +1, fn); + } +} + +static int +retr(Ref reg[2], AClass *aret) +{ + static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; + int n, k, ca, nr[2]; + + nr[0] = nr[1] = 0; + ca = 0; + for (n=0; aret->cls[n]>=0 && n<2; n++) { + k = KBASE(aret->cls[n]); + reg[n] = TMP(retreg[k][nr[k]++]); + ca += 1 << (2 * k); + } + return ca; +} + +static void +selret(Blk *b, Fn *fn) +{ + int j, k, ca; + Ref r, r0, reg[2]; + AClass aret; + + j = b->jmp.type; + + if (!isret(j) || j == JRet0) + return; + + r0 = b->jmp.arg; + b->jmp.type = JRet0; + + if (j == JRetc) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + assert(rtype(fn->retr) == RTmp); + emit(OCopy, Kl, TMP(RAX), fn->retr, R); + chuse(fn->retr, +1, fn); + blit(fn->retr, 0, r0, aret.size, fn); + ca = 1; + } else { + ca = retr(reg, &aret); + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + emit(OLoad, Kl, reg[1], r, R); + emit(OAdd, Kl, r, r0, getcon(8, fn)); + chuse(r0, +1, fn); + } + emit(OLoad, Kl, reg[0], r0, R); + } + } else { + k = j - JRetw; + if (KBASE(k) == 0) { + emit(OCopy, k, TMP(RAX), r0, R); + ca = 1; + } else { + emit(OCopy, k, TMP(XMM0), r0, R); + ca = 1 << 2; + } + } + + b->jmp.arg = CALL(ca); +} + +static void +seljmp(Blk *b, Fn *fn) +{ + Ref r; + int c, k; + Ins *fi; + + if (b->jmp.type == JRet0 || b->jmp.type == JJmp) + return; + assert(b->jmp.type == JJnz); + r = b->jmp.arg; + b->jmp.arg = R; + assert(!req(r, R)); + if (rtype(r) == RCon) { + b->jmp.type = JJmp; + if (req(r, CON_Z)) + b->s1 = b->s2; + b->s2 = 0; + return; + } + fi = flagi(b->ins, &b->ins[b->nins]); + if (fi && req(fi->to, r)) { + if (iscmp(fi->op, &k, &c)) { + if (rtype(fi->arg[0]) == RCon) + c = icmpop(c); + b->jmp.type = JXJc + c; + if (fn->tmp[r.val].nuse == 1) { + assert(fn->tmp[r.val].ndef == 1); + selcmp(fi->arg, k, fn); + *fi = (Ins){.op = ONop}; + } + return; + } + if (fi->op == OAnd && fn->tmp[r.val].nuse == 1 + && (rtype(fi->arg[0]) == RTmp || + rtype(fi->arg[1]) == RTmp)) { + fi->op = OXTest; + fi->to = R; + b->jmp.type = JXJc + ICne; + if (rtype(fi->arg[1]) == RCon) { + r = fi->arg[1]; + fi->arg[1] = fi->arg[0]; + fi->arg[0] = r; + } + return; + } + /* since flags are not tracked in liveness, + * the result of the flag-setting instruction + * has to be marked as live + */ + if (fn->tmp[r.val].nuse == 1) + emit(OCopy, Kw, R, r, R); + b->jmp.type = JXJc + ICne; + return; + } + selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */ + b->jmp.type = JXJc + ICne; +} + +static int +classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret) +{ + int nint, ni, nsse, ns, n, *pn; + AClass *a; + Ins *i; + + if (aret && aret->inmem) + nint = 5; /* hidden argument */ + else + nint = 6; + nsse = 8; + for (i=i0, a=ac; i<i1; i++, a++) { + if (i->op == op) { + if (KBASE(i->cls) == 0) + pn = &nint; + else + pn = &nsse; + if (*pn > 0) { + --*pn; + a->inmem = 0; + } else + a->inmem = 2; + a->align = 3; + a->size = 8; + a->cls[0] = i->cls; + } else { + n = i->arg[0].val & AMask; + aclass(a, &typ[n]); + if (a->inmem) + continue; + ni = ns = 0; + for (n=0; n<2; n++) + if (KBASE(a->cls[n]) == 0) + ni++; + else + ns++; + if (nint >= ni && nsse >= ns) { + nint -= ni; + nsse -= ns; + } else + a->inmem = 1; + } + } + + return ((6-nint) << 4) | ((8-nsse) << 8); +} + +int rsave[] = { + RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 +}; +int rclob[] = {RBX, R12, R13, R14, R15}; + +MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); +MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); + +bits +retregs(Ref r, int p[2]) +{ + bits b; + int ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = r.val & 3; + nf = (r.val >> 2) & 3; + if (ni >= 1) + b |= BIT(RAX); + if (ni >= 2) + b |= BIT(RDX); + if (nf >= 1) + b |= BIT(XMM0); + if (nf >= 2) + b |= BIT(XMM1); + if (p) { + p[0] = ni; + p[1] = nf; + } + return b; +} + +bits +argregs(Ref r, int p[2]) +{ + bits b; + int j, ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = (r.val >> 4) & 15; + nf = (r.val >> 8) & 15; + for (j=0; j<ni; j++) + b |= BIT(rsave[j]); + for (j=0; j<nf; j++) + b |= BIT(XMM0+j); + if (p) { + p[0] = ni + 1; + p[1] = nf; + } + return b | BIT(RAX); +} + +static Ref +rarg(int ty, int *ni, int *ns) +{ + if (KBASE(ty) == 0) + return TMP(rsave[(*ni)++]); + else + return TMP(XMM0 + (*ns)++); +} + +struct RAlloc { + Ins i; + RAlloc *link; +}; + +static void +selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) +{ + Ins *i; + AClass *ac, *a, aret; + int ca, ni, ns; + uint stk, off; + Ref r, r1, r2, reg[2], regcp[2]; + RAlloc *ra; + + ac = alloc((i1-i0) * sizeof ac[0]); + if (!req(i1->arg[1], R)) { + assert(rtype(i1->arg[1]) == RAType); + aclass(&aret, &typ[i1->arg[1].val & AMask]); + ca = classify(i0, i1, ac, OArg, &aret); + } else + ca = classify(i0, i1, ac, OArg, 0); + + for (stk=0, a=&ac[i1-i0]; a>ac;) + if ((--a)->inmem) { + assert(a->align <= 4); + stk += a->size; + if (a->align == 4) + stk += stk & 15; + } + stk += stk & 15; + if (stk) { + r = getcon(-(int64_t)stk, fn); + emit(OSAlloc, Kl, R, r, R); + } + + if (!req(i1->arg[1], R)) { + if (aret.inmem) { + /* get the return location from eax + * it saves one callee-save reg */ + r1 = newtmp("abi", Kl, fn); + emit(OCopy, Kl, i1->to, TMP(RAX), R); + ca += 1; + } else { + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + regcp[1] = newtmp("abi", aret.cls[1], fn); + emit(OStorel, 0, R, regcp[1], r); + emit(OAdd, Kl, r, i1->to, getcon(8, fn)); + chuse(i1->to, +1, fn); + ca += 1 << (2 * KBASE(aret.cls[1])); + } + regcp[0] = newtmp("abi", aret.cls[0], fn); + emit(OStorel, 0, R, regcp[0], i1->to); + ca += 1 << (2 * KBASE(aret.cls[0])); + retr(reg, &aret); + if (aret.size > 8) + emit(OCopy, aret.cls[1], regcp[1], reg[1], R); + emit(OCopy, aret.cls[0], regcp[0], reg[0], R); + r1 = i1->to; + } + /* allocate return pad */ + ra = alloc(sizeof *ra); + assert(NAlign == 3); + aret.align -= 2; + if (aret.align < 0) + aret.align = 0; + ra->i.op = OAlloc + aret.align; + ra->i.cls = Kl; + ra->i.to = r1; + ra->i.arg[0] = getcon(aret.size, fn); + ra->link = (*rap); + *rap = ra; + } else { + ra = 0; + if (KBASE(i1->cls) == 0) { + emit(OCopy, i1->cls, i1->to, TMP(RAX), R); + ca += 1; + } else { + emit(OCopy, i1->cls, i1->to, TMP(XMM0), R); + ca += 1 << 2; + } + } + emit(OCall, i1->cls, R, i1->arg[0], CALL(ca)); + emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); + + ni = ns = 0; + if (ra && aret.inmem) + emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ + for (i=i0, a=ac; i<i1; i++, a++) { + if (a->inmem) + continue; + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OArgc) { + if (a->size > 8) { + r2 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + emit(OLoad, a->cls[1], r2, r, R); + emit(OAdd, Kl, r, i->arg[1], getcon(8, fn)); + chuse(i->arg[1], +1, fn); + } + emit(OLoad, a->cls[0], r1, i->arg[1], R); + } else + emit(OCopy, i->cls, r1, i->arg[0], R); + } + + if (!stk) + return; + + r = newtmp("abi", Kl, fn); + chuse(r, -1, fn); + for (i=i0, a=ac, off=0; i<i1; i++, a++) { + if (!a->inmem) + continue; + if (i->op == OArgc) { + if (a->align == 4) + off += off & 15; + blit(r, off, i->arg[1], a->size, fn); + } else { + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, i->arg[0], r1); + emit(OAdd, Kl, r1, r, getcon(off, fn)); + chuse(r, +1, fn); + } + off += a->size; + } + emit(OSAlloc, Kl, r, getcon(stk, fn), R); +} + +static void +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + AClass *ac, *a, aret; + Ins *i; + int ni, ns, s, al; + Ref r, r1; + + ac = alloc((i1-i0) * sizeof ac[0]); + curi = insb; + ni = ns = 0; + + if (fn->retty >= 0) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl}; + fn->retr = r; + } + classify(i0, i1, ac, OPar, &aret); + } else + classify(i0, i1, ac, OPar, 0); + + assert(NAlign == 3); + + s = 4; + for (i=i0, a=ac; i<i1; i++, a++) { + switch (a->inmem) { + case 1: + assert(a->align <= 4); + if (a->align == 4) + s = (s+3) & -4; + fn->tmp[i->to.val].slot = -s; /* HACK! */ + s += a->size / 4; + continue; + case 2: + *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls}; + s += 2; + continue; + } + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OParc) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[0] = r.val; + if (a->size > 8) { + r1 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[1] = r.val; + } + } else + *curi++ = (Ins){OCopy, i->to, {r1}, i->cls}; + } + for (i=i0, a=ac; i<i1; i++, a++) { + if (i->op != OParc || a->inmem) + continue; + assert(NAlign == 3); + for (al=0; a->align >> (al+2); al++) + ; + r = TMP(a->cls[0]); + r1 = i->to; + *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl}; + *curi++ = (Ins){OStorel, R, {r, r1}, 0}; + if (a->size > 8) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl}; + r1 = TMP(a->cls[1]); + *curi++ = (Ins){OStorel, R, {r1, r}, 0}; + } + } +} + +static int +aref(Ref r, ANum *ai) +{ + switch (rtype(r)) { + default: + diag("isel: aref defaulted"); + case RCon: + return 2; + case RTmp: + return ai[r.val].n; + } +} + +static int +ascale(Ref r, Con *con) +{ + int64_t n; + + if (rtype(r) != RCon) + return 0; + if (con[r.val].type != CBits) + return 0; + n = con[r.val].bits.i; + return n == 1 || n == 2 || n == 4 || n == 8; +} + +static void +anumber(ANum *ai, Blk *b, Con *con) +{ + /* This should be made obsolete by a proper + * reassoc pass. + * + * Rules: + * + * RTmp(_) -> 0 tmp + * ( RTmp(_) -> 1 slot ) + * RCon(_) -> 2 con + * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) + */ + static char add[10][10] = { + [2] [2] = 2, /* folding */ + [2] [5] = 5, [5] [2] = 5, + [2] [6] = 6, [6] [2] = 6, + [2] [7] = 7, [7] [2] = 7, + [0] [0] = 4, /* 4: b + s * i */ + [0] [3] = 4, [3] [0] = 4, + [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */ + [0] [2] = 6, [2] [0] = 6, /* 6: o + b */ + [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */ + [0] [5] = 7, [5] [0] = 7, + [6] [3] = 7, [3] [6] = 7, + + }; + int a, a1, a2, n1, n2, t1, t2; + Ins *i; + + for (i=b->ins; i-b->ins < b->nins; i++) { + if (rtype(i->to) == RTmp) + ai[i->to.val].i = i; + if (i->op != OAdd && i->op != OMul) + continue; + a1 = aref(i->arg[0], ai); + a2 = aref(i->arg[1], ai); + t1 = a1 != 1 && a1 != 2; + t2 = a2 != 1 && a2 != 2; + if (i->op == OAdd) { + a = add[n1 = a1][n2 = a2]; + if (t1 && a < add[0][a2]) + a = add[n1 = 0][n2 = a2]; + if (t2 && a < add[a1][0]) + a = add[n1 = a1][n2 = 0]; + if (t1 && t2 && a < add[0][0]) + a = add[n1 = 0][n2 = 0]; + } else { + n1 = n2 = a = 0; + if (ascale(i->arg[0], con) && t2) + a = 3, n1 = 2, n2 = 0; + if (t1 && ascale(i->arg[1], con)) + a = 3, n1 = 0, n2 = 2; + } + ai[i->to.val].n = a; + ai[i->to.val].l = n1; + ai[i->to.val].r = n2; + } +} + +static void +amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top) +{ + Ins *i; + int nl, nr, t, s; + Ref al, ar; + + if (top) + memset(a, 0, sizeof *a); + if (rtype(r) == RCon) { + addcon(&a->offset, &fn->con[r.val]); + return; + } + assert(rtype(r) == RTmp); + i = ai[r.val].i; + nl = ai[r.val].l; + nr = ai[r.val].r; + if (i) { + if (nl > nr) { + al = i->arg[1]; + ar = i->arg[0]; + t = nl, nl = nr, nr = t; + } else { + al = i->arg[0]; + ar = i->arg[1]; + } + } + switch (ai[r.val].n) { + default: + diag("isel: amatch defaulted"); + case 3: /* s * i */ + if (!top) { + a->index = al; + a->scale = fn->con[ar.val].bits.i; + } else + a->base = r; + break; + case 4: /* b + s * i */ + switch (nr) { + case 0: + if (fn->tmp[ar.val].slot != -1) { + al = i->arg[1]; + ar = i->arg[0]; + } + a->index = ar; + a->scale = 1; + break; + case 3: + amatch(a, ar, ai, fn, 0); + break; + } + r = al; + case 0: + s = fn->tmp[r.val].slot; + if (s != -1) + r = SLOT(s); + a->base = r; + break; + case 2: /* constants */ + case 5: /* o + s * i */ + case 6: /* o + b */ + case 7: /* o + b + s * i */ + amatch(a, ar, ai, fn, 0); + amatch(a, al, ai, fn, 0); + break; + } +} + +/* instruction selection + * requires use counts (as given by parsing) + */ +void +isel(Fn *fn) +{ + Blk *b, **sb; + Ins *i, *i0, *ip; + Phi *p; + uint a; + int n, al; + int64_t sz; + ANum *ainfo; + RAlloc *ral; + + for (n=0; n<fn->ntmp; n++) + fn->tmp[n].slot = -1; + fn->slot = 0; + + /* lower arguments */ + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (i->op != OPar && i->op != OParc) + break; + selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (curi - insb); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, insb, curi - insb); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower function calls and returns */ + ral = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) { + if ((--i)->op == OCall) { + for (i0=i; i0>b->ins; i0--) + if ((i0-1)->op != OArg) + if ((i0-1)->op != OArgc) + break; + selcall(fn, i0, i, &ral); + i = i0; + continue; + } + assert(i->op != OArg && i->op != OArgc); + emiti(*i); + } + if (b == fn->start) + for (; ral; ral=ral->link) + emiti(ral->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After call lowering:\n"); + printfn(fn, stderr); + } + + /* assign slots to fast allocs */ + b = fn->start; + assert(NAlign == 3 && "change n=4 and sz /= 4 below"); + for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2) + for (i=b->ins; i-b->ins < b->nins; i++) + if (i->op == al) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-3) + diag("isel: invalid alloc size"); + sz = (sz + n-1) & -n; + sz /= 4; + fn->tmp[i->to.val].slot = fn->slot; + fn->slot += sz; + *i = (Ins){.op = ONop}; + } + + /* process basic blocks */ + n = fn->ntmp; + ainfo = emalloc(n * sizeof ainfo[0]); + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) + for (p=(*sb)->phi; p; p=p->link) { + for (a=0; p->blk[a] != b; a++) + assert(a+1 < p->narg); + fixarg(&p->arg[a], p->cls, 1, fn); + } + memset(ainfo, 0, n * sizeof ainfo[0]); + anumber(ainfo, b, fn->con); + seljmp(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + sel(*--i, ainfo, fn); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + free(ainfo); + + if (debug['I']) { + fprintf(stderr, "\n> After instruction selection:\n"); + printfn(fn, stderr); + } +} diff --git a/src/live.c b/src/live.c new file mode 100644 index 0000000..44806e1 --- /dev/null +++ b/src/live.c @@ -0,0 +1,174 @@ +#include "all.h" + +void +liveon(BSet *v, Blk *b, Blk *s) +{ + Phi *p; + uint a; + + bscopy(v, s->in); + for (p=s->phi; p; p=p->link) { + bsclr(v, p->to.val); + for (a=0; a<p->narg; a++) + if (p->blk[a] == b) + if (rtype(p->arg[a]) == RTmp) + bsset(v, p->arg[a].val); + } +} + +static int +phitmp(int t, Tmp *tmp) +{ + int tp; + + tp = tmp[t].phi; + return tp ? tp : t; +} + +static void +phifix(int t1, short *phi, Tmp *tmp) +{ + int t, t2; + + /* detect temporaries arguments + * of the same phi node that + * interfere and separate them + */ + t = phitmp(t1, tmp); + t2 = phi[t]; + if (t2 && t2 != t1) { + if (t != t1) { + tmp[t1].phi = t1; + t = t1; + } else { + tmp[t2].phi = t2; + phi[t2] = t2; + } + } + phi[t] = t1; +} + +static void +bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp) +{ + + if (rtype(r) != RTmp) + return; + bsset(b->gen, r.val); + phifix(r.val, phi, tmp); + if (!bshas(b->in, r.val)) { + nlv[KBASE(tmp[r.val].cls)]++; + bsset(b->in, r.val); + } +} + +/* liveness analysis + * requires rpo computation + */ +void +filllive(Fn *f) +{ + Blk *b; + Ins *i; + int k, t, m[2], n, chg, nlv[2]; + short *phi; + BSet u[1], v[1]; + Mem *ma; + + bsinit(u, f->ntmp); + bsinit(v, f->ntmp); + phi = emalloc(f->ntmp * sizeof phi[0]); + for (b=f->start; b; b=b->link) { + bsinit(b->in, f->ntmp); + bsinit(b->out, f->ntmp); + bsinit(b->gen, f->ntmp); + } + chg = 1; +Again: + for (n=f->nblk-1; n>=0; n--) { + b = f->rpo[n]; + + bscopy(u, b->out); + if (b->s1) { + liveon(v, b, b->s1); + bsunion(b->out, v); + } + if (b->s2) { + liveon(v, b, b->s2); + bsunion(b->out, v); + } + chg |= !bsequal(b->out, u); + + memset(phi, 0, f->ntmp * sizeof phi[0]); + memset(nlv, 0, sizeof nlv); + bscopy(b->in, b->out); + for (t=0; t<f->ntmp; t++) + if (bshas(b->in, t)) { + phifix(t, phi, f->tmp); + nlv[KBASE(f->tmp[t].cls)]++; + } + if (rtype(b->jmp.arg) == RACall) { + assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0); + b->in->t[0] |= retregs(b->jmp.arg, nlv); + } else + bset(b->jmp.arg, b, nlv, phi, f->tmp); + for (k=0; k<2; k++) + b->nlive[k] = nlv[k]; + for (i=&b->ins[b->nins]; i!=b->ins;) { + if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) { + b->in->t[0] &= ~retregs(i->arg[1], m); + for (k=0; k<2; k++) + nlv[k] -= m[k]; + if (nlv[0] + NISave > b->nlive[0]) + b->nlive[0] = nlv[0] + NISave; + if (nlv[1] + NFSave > b->nlive[1]) + b->nlive[1] = nlv[1] + NFSave; + b->in->t[0] |= argregs(i->arg[1], m); + for (k=0; k<2; k++) + nlv[k] += m[k]; + } + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + if (bshas(b->in, i->to.val)) + nlv[KBASE(f->tmp[t].cls)]--; + bsset(b->gen, t); + bsclr(b->in, t); + phi[phitmp(t, f->tmp)] = 0; + } + for (k=0; k<2; k++) + switch (rtype(i->arg[k])) { + case RAMem: + ma = &f->mem[i->arg[k].val & AMask]; + bset(ma->base, b, nlv, phi, f->tmp); + bset(ma->index, b, nlv, phi, f->tmp); + break; + default: + bset(i->arg[k], b, nlv, phi, f->tmp); + break; + } + for (k=0; k<2; k++) + if (nlv[k] > b->nlive[k]) + b->nlive[k] = nlv[k]; + } + } + if (chg) { + chg = 0; + goto Again; + } + free(phi); + + if (debug['L']) { + fprintf(stderr, "\n> Liveness analysis:\n"); + for (b=f->start; b; b=b->link) { + fprintf(stderr, "\t%-10sin: ", b->name); + dumpts(b->in, f->tmp, stderr); + fprintf(stderr, "\t out: "); + dumpts(b->out, f->tmp, stderr); + fprintf(stderr, "\t gen: "); + dumpts(b->gen, f->tmp, stderr); + fprintf(stderr, "\t live: "); + fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]); + } + } +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..b8cd7d6 --- /dev/null +++ b/src/main.c @@ -0,0 +1,117 @@ +#include "all.h" +#include <ctype.h> +#include <getopt.h> + +char debug['Z'+1] = { + ['P'] = 0, /* parsing */ + ['A'] = 0, /* abi lowering */ + ['I'] = 0, /* instruction selection */ + ['L'] = 0, /* liveness */ + ['M'] = 0, /* memory optimization */ + ['N'] = 0, /* ssa construction */ + ['C'] = 0, /* copy elimination */ + ['S'] = 0, /* spilling */ + ['R'] = 0, /* reg. allocation */ +}; + +static FILE *outf; +static int dbg; + +static void +data(Dat *d) +{ + if (dbg) + return; + if (d->type == DEnd) { + fputs("/* end data */\n\n", outf); + freeall(); + } + emitdat(d, outf); +} + +static void +func(Fn *fn) +{ + int n; + + if (dbg) + fprintf(stderr, "**** Function %s ****", fn->name); + if (debug['P']) { + fprintf(stderr, "\n> After parsing:\n"); + printfn(fn, stderr); + } + fillrpo(fn); + fillpreds(fn); + filluse(fn); + memopt(fn); + ssa(fn); + filluse(fn); + copy(fn); + filluse(fn); + isel(fn); + filllive(fn); + fillcost(fn); + spill(fn); + rega(fn); + fillrpo(fn); + assert(fn->rpo[0] == fn->start); + for (n=0;; n++) + if (n == fn->nblk-1) { + fn->rpo[n]->link = 0; + break; + } else + fn->rpo[n]->link = fn->rpo[n+1]; + if (!dbg) { + emitfn(fn, outf); + fprintf(outf, "/* end function %s */\n\n", fn->name); + } else + fprintf(stderr, "\n"); + freeall(); +} + +int +main(int ac, char *av[]) +{ + FILE *inf; + char *f; + int c; + + outf = stdout; + while ((c = getopt(ac, av, "d:o:")) != -1) + switch (c) { + case 'd': + for (; *optarg; optarg++) + if (isalpha(*optarg)) { + debug[toupper(*optarg)] = 1; + dbg = 1; + } + break; + case 'o': + if (strcmp(optarg, "-") != 0) + outf = fopen(optarg, "w"); + break; + default: + fprintf(stderr, "usage: %s [-d <flags>] [-o out] {file.ssa, -}\n", av[0]); + exit(1); + } + + do { + f = av[optind]; + if (!f || strcmp(f, "-") == 0) { + inf = stdin; + f = "-"; + } else { + inf = fopen(f, "r"); + if (!inf) { + fprintf(stderr, "cannot open '%s'\n", f); + exit(1); + } + } + parse(inf, f, data, func); + } while (++optind < ac); + + if (!dbg) + emitfin(outf); + + exit(0); +} diff --git a/src/mem.c b/src/mem.c new file mode 100644 index 0000000..bda43d7 --- /dev/null +++ b/src/mem.c @@ -0,0 +1,81 @@ +#include "all.h" + +/* Memory optimization: + * + * - replace alloced slots used only in + * load/store operations + * Assumption: all the accesses have the + * same size (this could be wrong...) + */ + +/* require use, maintains use counts */ +void +memopt(Fn *fn) +{ + Blk *b; + Ins *i, *l; + Tmp *t; + Use *u, *ue; + int a; + + b = fn->start; + for (i=b->ins; i-b->ins < b->nins; i++) { + if (OAlloc > i->op || i->op > OAlloc1) + continue; + assert(NAlign == 3); + assert(rtype(i->to) == RTmp); + t = &fn->tmp[i->to.val]; + for (u=t->use; u != &t->use[t->nuse]; u++) { + if (u->type != UIns) + goto NextIns; + l = u->u.ins; + if (!isload(l->op) + && (!isstore(l->op) || req(i->to, l->arg[0]))) + goto NextIns; + } + /* get rid of the alloc and replace uses */ + *i = (Ins){.op = ONop}; + t->ndef--; + ue = &t->use[t->nuse]; + for (u=t->use; u!=ue; u++) { + l = u->u.ins; + if (isstore(l->op)) { + if (l->op == OStores) + l->cls = Kd; + else if (l->op == OStored) + l->cls = Kd; + else if (l->op == OStorel) + l->cls = Kl; + else + l->cls = Kw; + l->op = OCopy; + l->to = l->arg[1]; + l->arg[1] = R; + t->nuse--; + t->ndef++; + } else + /* try to turn loads into copies so we + * can eliminate them later */ + switch(l->op) { + case OLoad: + l->op = OCopy; + break; + case OLoadsw: + case OLoaduw: + l->cls = Kw; + l->op = OCopy; + break; + default: + /* keep l->cls */ + a = l->op - OLoadsw; + l->op = OExtsw + a; + break; + } + } + NextIns:; + } + if (debug['M']) { + fprintf(stderr, "\n> After memory optimization:\n"); + printfn(fn, stderr); + } +} diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..903e909 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,1081 @@ +#include "all.h" +#include <ctype.h> +#include <stdarg.h> + +enum { + Kx = -1, /* Invalid operand */ + Km = Kl, /* Memory pointer (for x64) */ +}; + +OpDesc opdesc[NOp] = { +#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d} + + /* NAME NM ARGCLS0 ARGCLS1 SF LF */ + [OAdd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OSub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [ODiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [ORem] = { "rem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, + [OUDiv] = { "udiv", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OURem] = { "urem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, + [OMul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OAnd] = { "and", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OOr] = { "or", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OXor] = { "xor", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OSar] = { "sar", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OShr] = { "shr", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OShl] = { "shl", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OStored] = { "stored", 0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 }, + [OStores] = { "stores", 0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 }, + [OStorel] = { "storel", 0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 }, + [OStorew] = { "storew", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OStoreh] = { "storeh", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OStoreb] = { "storeb", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OLoad] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 }, + [OLoadsw] = { "loadsw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoaduw] = { "loaduw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadsh] = { "loadsh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoaduh] = { "loaduh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadsb] = { "loadsb", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadub] = { "loadub", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OExtsw] = { "extsw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtuw] = { "extuw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtsh] = { "extsh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtuh] = { "extuh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtsb] = { "extsb", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtub] = { "extub", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExts] = { "exts", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OTruncd] = { "truncd", 0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 }, + [OFtosi] = { "ftosi", 0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 }, + [OSitof] = { "sitof", 0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 }, + [OCast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 }, + [OCopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 }, + [ONop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, + [OSwap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OSign] = { "sign", 0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OSAlloc] = { "salloc", 0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OXDiv] = { "xdiv", 1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OXCmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OXTest] = { "xtest", 1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 }, + [OAddr] = { "addr", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OPar] = { "parn", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OParc] = { "parc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OArg] = { "arg", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OArgc] = { "argc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OCall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 }, + [OXSetnp] = { "xsetnp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OXSetp] = { "xsetp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OAlloc] = { "alloc4", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, + [OAlloc+1] = { "alloc8", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, + [OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, +#define X(c) \ + [OCmpw+IC##c] = { "c" #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \ + [OCmpl+IC##c] = { "c" #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \ + [OXSet+IC##c] = { "xset" #c, 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, + ICMPS(X) +#undef X +#define X(c) \ + [OCmps+FC##c] = { "c" #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \ + [OCmpd+FC##c] = { "c" #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 }, + FCMPS(X) +#undef X + +}; +#undef A + +typedef enum { + PXXX, + PLbl, + PPhi, + PIns, + PEnd, +} PState; + +enum { + TXXX = NPubOp, + TCall, + TPhi, + TJmp, + TJnz, + TRet, + TFunc, + TType, + TData, + TAlign, + TL, + TW, + TH, + TB, + TD, + TS, + TZ, + + TInt, + TFlts, + TFltd, + TTmp, + TLbl, + TGlo, + TTyp, + TStr, + + TPlus, + TEq, + TComma, + TLParen, + TRParen, + TLBrace, + TRBrace, + TNL, + TEOF, +}; + + +static FILE *inf; +static char *inpath; +static int thead; +static struct { + char chr; + double fltd; + float flts; + int64_t num; + char *str; +} tokval; +static int lnum; + +static Tmp *tmp; +static Con *con; +static int ntmp; +static int ncon; +static Phi **plink; +static Blk **bmap; +static Blk *curb; +static Blk **blink; +static int nblk; +static int rcls; +static int ntyp; + + +void +err(char *s, ...) +{ + char buf[100], *p, *end; + va_list ap; + + p = buf; + end = buf + sizeof(buf); + + va_start(ap, s); + p += snprintf(p, end - p, "%s:%d: ", inpath, lnum); + p += vsnprintf(p, end - p, s, ap); + va_end(ap); + + diag(buf); +} + +static int +lex() +{ + static struct { + char *str; + int tok; + } tmap[] = { + { "call", TCall }, + { "phi", TPhi }, + { "jmp", TJmp }, + { "jnz", TJnz }, + { "ret", TRet }, + { "function", TFunc }, + { "type", TType }, + { "data", TData }, + { "align", TAlign }, + { "l", TL }, + { "w", TW }, + { "h", TH }, + { "b", TB }, + { "d", TD }, + { "s", TS }, + { "z", TZ }, + { "loadw", OLoad }, /* for convenience */ + { "loadl", OLoad }, + { "loads", OLoad }, + { "loadd", OLoad }, + { "alloc1", OAlloc }, + { "alloc2", OAlloc }, + { 0, TXXX } + }; + static char tok[NString]; + int c, i; + int t; + + do + c = fgetc(inf); + while (isblank(c)); + t = TXXX; + tokval.chr = c; + switch (c) { + case EOF: + return TEOF; + case ',': + return TComma; + case '(': + return TLParen; + case ')': + return TRParen; + case '{': + return TLBrace; + case '}': + return TRBrace; + case '=': + return TEq; + case '+': + return TPlus; + case 's': + if (fscanf(inf, "_%f", &tokval.flts) != 1) + break; + return TFlts; + case 'd': + if (fscanf(inf, "_%lf", &tokval.fltd) != 1) + break; + return TFltd; + case '%': + t = TTmp; + goto Alpha; + case '@': + t = TLbl; + goto Alpha; + case '$': + t = TGlo; + goto Alpha; + case ':': + t = TTyp; + goto Alpha; + case '#': + while (fgetc(inf) != '\n') + ; + case '\n': + lnum++; + return TNL; + } + if (isdigit(c) || c == '-' || c == '+') { + ungetc(c, inf); + if (fscanf(inf, "%"SCNd64, &tokval.num) != 1) + err("invalid integer literal"); + return TInt; + } + if (c == '"') { + tokval.str = vnew(0, 1); + for (i=0;; i++) { + c = fgetc(inf); + vgrow(&tokval.str, i+1); + if (c == '"') + if (!i || tokval.str[i-1] != '\\') { + tokval.str[i] = 0; + return TStr; + } + tokval.str[i] = c; + } + } + if (0) +Alpha: c = fgetc(inf); + if (!isalpha(c) && c != '.' && c != '_') + err("lexing failure: invalid character %c (%d)", c, c); + i = 0; + do { + if (i >= NString-1) + err("identifier too long"); + tok[i++] = c; + c = fgetc(inf); + } while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c)); + tok[i] = 0; + ungetc(c, inf); + tokval.str = tok; + if (t != TXXX) { + return t; + } + for (i=0; i<NPubOp; i++) + if (opdesc[i].name) + if (strcmp(tok, opdesc[i].name) == 0) + return i; + for (i=0; tmap[i].str; i++) + if (strcmp(tok, tmap[i].str) == 0) + return tmap[i].tok; + err("unknown keyword %s", tokval.str); + return TXXX; +} + +static int +peek() +{ + if (thead == TXXX) + thead = lex(); + return thead; +} + +static int +next() +{ + int t; + + t = peek(); + thead = TXXX; + return t; +} + +static int +nextnl() +{ + int t; + + while ((t = next()) == TNL) + ; + return t; +} + +static void +expect(int t) +{ + static char *ttoa[] = { + [TLbl] = "label", + [TComma] = ",", + [TEq] = "=", + [TNL] = "newline", + [TLParen] = "(", + [TRParen] = ")", + [TLBrace] = "{", + [TRBrace] = "}", + [TEOF] = 0, + }; + char buf[128], *s1, *s2; + int t1; + + t1 = next(); + if (t == t1) + return; + s1 = ttoa[t] ? ttoa[t] : "??"; + s2 = ttoa[t1] ? ttoa[t1] : "??"; + sprintf(buf, "%s expected, got %s instead", s1, s2); + err(buf); +} + +static Ref +tmpref(char *v) +{ + int t; + + for (t=Tmp0; t<ntmp; t++) + if (strcmp(v, tmp[t].name) == 0) + return TMP(t); + vgrow(&tmp, ++ntmp); + strcpy(tmp[t].name, v); + return TMP(t); +} + +static Ref +parseref() +{ + Con c; + int i; + + memset(&c, 0, sizeof c); + switch (next()) { + case TTmp: + return tmpref(tokval.str); + case TInt: + c.type = CBits; + c.bits.i = tokval.num; + goto Look; + case TFlts: + c.type = CBits; + c.bits.s = tokval.flts; + c.flt = 1; + goto Look; + case TFltd: + c.type = CBits; + c.bits.d = tokval.fltd; + c.flt = 2; + goto Look; + case TGlo: + c.type = CAddr; + strcpy(c.label, tokval.str); + Look: + for (i=0; i<ncon; i++) + if (con[i].type == c.type + && con[i].bits.i == c.bits.i + && strcmp(con[i].label, c.label) == 0) + return CON(i); + vgrow(&con, ++ncon); + con[i] = c; + return CON(i); + default: + return R; + } +} + +static int +parsecls(int *tyn) +{ + int i; + + switch (next()) { + default: + err("invalid class specifier"); + case TTyp: + for (i=0; i<ntyp; i++) + if (strcmp(tokval.str, typ[i].name) == 0) { + *tyn = i; + return 4; + } + err("undefined type"); + case TW: + return Kw; + case TL: + return Kl; + case TS: + return Ks; + case TD: + return Kd; + } +} + +static void +parserefl(int arg) +{ + int k, t, ty; + Ref r; + + expect(TLParen); + if (peek() == TRParen) { + next(); + return; + } + for (;;) { + if (curi - insb >= NIns) + err("too many instructions (1)"); + k = parsecls(&ty); + r = parseref(); + if (req(r, R)) + err("invalid reference argument"); + if (!arg && rtype(r) != RTmp) + err("invalid function parameter"); + if (k == 4) + if (arg) + *curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl}; + else + *curi = (Ins){OParc, r, {TYPE(ty)}, Kl}; + else + if (arg) + *curi = (Ins){OArg, R, {r}, k}; + else + *curi = (Ins){OPar, r, {R}, k}; + curi++; + t = next(); + if (t == TRParen) + break; + if (t != TComma) + err(", or ) expected"); + } +} + +static Blk * +findblk(char *name) +{ + int i; + + for (i=0; i<nblk; i++) + if (strcmp(bmap[i]->name, name) == 0) + return bmap[i]; + vgrow(&bmap, ++nblk); + bmap[i] = blknew(); + strcpy(bmap[i]->name, name); + return bmap[i]; +} + +static void +closeblk() +{ + curb->nins = curi - insb; + idup(&curb->ins, insb, curb->nins); + blink = &curb->link; + curi = insb; +} + +static PState +parseline(PState ps) +{ + Ref arg[NPred] = {R}; + Blk *blk[NPred]; + Phi *phi; + Ref r; + Blk *b; + int t, op, i, k, ty; + + t = nextnl(); + if (ps == PLbl && t != TLbl && t != TRBrace) + err("label or } expected"); + switch (t) { + default: + if (isstore(t)) { + /* operations without result */ + r = R; + k = 0; + op = t; + goto DoOp; + } + err("label, instruction or jump expected"); + case TRBrace: + return PEnd; + case TTmp: + break; + case TLbl: + b = findblk(tokval.str); + if (b->jmp.type != JXXX) + err("multiple definitions of block"); + if (curb && curb->jmp.type == JXXX) { + closeblk(); + curb->jmp.type = JJmp; + curb->s1 = b; + } + *blink = b; + curb = b; + plink = &curb->phi; + expect(TNL); + return PPhi; + case TRet: + curb->jmp.type = (int[]){ + JRetw, JRetl, + JRets, JRetd, + JRetc, JRet0 + }[rcls]; + if (rcls < 5) { + r = parseref(); + if (req(r, R)) + err("return value expected"); + curb->jmp.arg = r; + } + goto Close; + case TJmp: + curb->jmp.type = JJmp; + goto Jump; + case TJnz: + curb->jmp.type = JJnz; + r = parseref(); + if (req(r, R)) + err("invalid argument for jnz jump"); + curb->jmp.arg = r; + expect(TComma); + Jump: + expect(TLbl); + curb->s1 = findblk(tokval.str); + if (curb->jmp.type != JJmp) { + expect(TComma); + expect(TLbl); + curb->s2 = findblk(tokval.str); + } + Close: + expect(TNL); + closeblk(); + return PLbl; + } + r = tmpref(tokval.str); + expect(TEq); + k = parsecls(&ty); + op = next(); +DoOp: + if (op == TPhi) { + if (ps != PPhi) + err("unexpected phi instruction"); + op = -1; + } + if (op == TCall) { + arg[0] = parseref(); + parserefl(1); + expect(TNL); + op = OCall; + if (k == 4) { + k = Kl; + arg[1] = TYPE(ty); + } else + arg[1] = R; + goto Ins; + } + if (k == 4) + err("size class must be w, l, s, or d"); + if (op >= NPubOp) + err("invalid instruction"); + i = 0; + if (peek() != TNL) + for (;;) { + if (i == NPred) + err("too many arguments"); + if (op == -1) { + expect(TLbl); + blk[i] = findblk(tokval.str); + } + arg[i] = parseref(); + if (req(arg[i], R)) + err("invalid instruction argument"); + i++; + t = peek(); + if (t == TNL) + break; + if (t != TComma) + err(", or end of line expected"); + next(); + } + next(); + if (op != -1) { + Ins: + if (curi - insb >= NIns) + err("too many instructions (2)"); + curi->op = op; + curi->cls = k; + curi->to = r; + curi->arg[0] = arg[0]; + curi->arg[1] = arg[1]; + curi++; + return PIns; + } else { + phi = alloc(sizeof *phi); + phi->to = r; + phi->cls = k; + memcpy(phi->arg, arg, i * sizeof arg[0]); + memcpy(phi->blk, blk, i * sizeof blk[0]); + phi->narg = i; + *plink = phi; + plink = &phi->link; + return PPhi; + } +} + +static Fn * +parsefn() +{ + PState ps; + Fn *fn; + + ntmp = Tmp0; + ncon = 1; /* first constant must be 0 */ + curb = 0; + nblk = 0; + curi = insb; + tmp = vnew(ntmp, sizeof tmp[0]); + con = vnew(ncon, sizeof con[0]); + bmap = vnew(nblk, sizeof bmap[0]); + con[0].type = CBits; + fn = alloc(sizeof *fn); + blink = &fn->start; + fn->retty = -1; + if (peek() != TGlo) + rcls = parsecls(&fn->retty); + else + rcls = 5; + if (next() != TGlo) + err("function name expected"); + strcpy(fn->name, tokval.str); + parserefl(0); + if (nextnl() != TLBrace) + err("function body must start with {"); + ps = PLbl; + do + ps = parseline(ps); + while (ps != PEnd); + if (!curb) + err("empty file"); + if (curb->jmp.type == JXXX) + err("last block misses jump"); + fn->tmp = tmp; + fn->con = con; + fn->mem = vnew(0, sizeof fn->mem[0]); + fn->ntmp = ntmp; + fn->ncon = ncon; + fn->nmem = 0; + fn->nblk = nblk; + fn->rpo = 0; + return fn; +} + +static void +parsetyp() +{ + Typ *ty; + int t, n, sz, al, s, a, c, flt; + + if (ntyp >= NTyp) + err("too many type definitions"); + ty = &typ[ntyp++]; + ty->align = -1; + if (nextnl() != TTyp || nextnl() != TEq) + err("type name, then = expected"); + strcpy(ty->name, tokval.str); + t = nextnl(); + if (t == TAlign) { + if (nextnl() != TInt) + err("alignment expected"); + for (al=0; tokval.num /= 2; al++) + ; + ty->align = al; + t = nextnl(); + } + if (t != TLBrace) + err("type body must start with {"); + t = nextnl(); + if (t == TInt) { + ty->dark = 1; + ty->size = tokval.num; + if (ty->align == -1) + err("dark types need alignment"); + t = nextnl(); + } else { + ty->dark = 0; + n = -1; + sz = 0; + al = 0; + for (;;) { + flt = 0; + switch (t) { + default: err("invalid size specifier %c", tokval.chr); + case TD: flt = 1; + case TL: s = 8; a = 3; break; + case TS: flt = 1; + case TW: s = 4; a = 2; break; + case TH: s = 2; a = 1; break; + case TB: s = 1; a = 0; break; + } + if (a > al) + al = a; + if ((a = sz & (s-1))) { + a = s - a; + if (++n < NSeg) { + /* padding segment */ + ty->seg[n].ispad = 1; + ty->seg[n].len = a; + } + } + t = nextnl(); + if (t == TInt) { + c = tokval.num; + t = nextnl(); + } else + c = 1; + while (c-- > 0) { + if (++n < NSeg) { + ty->seg[n].isflt = flt; + ty->seg[n].ispad = 0; + ty->seg[n].len = s; + } + sz += a + s; + } + if (t != TComma) + break; + t = nextnl(); + } + if (++n >= NSeg) + ty->dark = 1; + else + ty->seg[n].len = 0; + if (ty->align == -1) + ty->align = al; + else + al = ty->align; + a = (1 << al) - 1; + ty->size = (sz + a) & ~a; + } + if (t != TRBrace) + err("expected closing }"); +} + +static void +parsedatref(Dat *d) +{ + int t; + + d->isref = 1; + d->u.ref.nam = tokval.str; + d->u.ref.off = 0; + t = peek(); + if (t == TPlus) { + next(); + if (next() != TInt) + err("invalid token after offset in ref"); + d->u.ref.off = tokval.num; + } +} + +static void +parsedatstr(Dat *d) +{ + d->isstr = 1; + d->u.str = tokval.str; +} + +static void +parsedat(void cb(Dat *)) +{ + char s[NString]; + int t; + Dat d; + + d.type = DStart; + d.isstr = 0; + d.isref = 0; + cb(&d); + if (nextnl() != TGlo || nextnl() != TEq) + err("data name, then = expected"); + strcpy(s, tokval.str); + t = nextnl(); + if (t == TAlign) { + if (nextnl() != TInt) + err("alignment expected"); + d.type = DAlign; + d.u.num = tokval.num; + cb(&d); + t = nextnl(); + } + d.type = DName; + d.u.str = s; + cb(&d); + + if (t != TLBrace) + err("expected data contents in { .. }"); + for (;;) { + switch (nextnl()) { + default: err("invalid size specifier %c in data", tokval.chr); + case TRBrace: goto Done; + case TL: d.type = DL; break; + case TW: d.type = DW; break; + case TH: d.type = DH; break; + case TB: d.type = DB; break; + case TS: d.type = DW; break; + case TD: d.type = DL; break; + case TZ: d.type = DZ; break; + } + t = nextnl(); + do { + d.isref = 0; + d.isstr = 0; + memset(&d.u, 0, sizeof d.u); + if (t == TFlts) + d.u.flts = tokval.flts; + else if (t == TFltd) + d.u.fltd = tokval.fltd; + else if (t == TInt) + d.u.num = tokval.num; + else if (t == TGlo) + parsedatref(&d); + else if (t == TStr) + parsedatstr(&d); + else + err("constant literal expected"); + cb(&d); + t = nextnl(); + } while (t == TInt || t == TFlts || t == TFltd); + if (t == TRBrace) + break; + if (t != TComma) + err(", or } expected"); + } +Done: + d.type = DEnd; + cb(&d); +} + +void +parse(FILE *f, char *path, void data(Dat *), void func(Fn *)) +{ + inf = f; + inpath = path; + lnum = 1; + thead = TXXX; + ntyp = 0; + for (;;) + switch (nextnl()) { + case TFunc: + func(parsefn()); + break; + case TType: + parsetyp(); + break; + case TData: + parsedat(data); + break; + case TEOF: + return; + default: + err("top-level definition expected"); + break; + } +} + +static void +printcon(Con *c, FILE *f) +{ + switch (c->type) { + case CUndef: + break; + case CAddr: + fprintf(f, "$%s", c->label); + if (c->bits.i) + fprintf(f, "%+"PRIi64, c->bits.i); + break; + case CBits: + if (c->flt == 1) + fprintf(f, "s_%f", c->bits.s); + else if (c->flt == 2) + fprintf(f, "d_%lf", c->bits.d); + else + fprintf(f, "%"PRIi64, c->bits.i); + break; + } +} + +void +printref(Ref r, Fn *fn, FILE *f) +{ + int i; + Mem *m; + + switch (rtype(r)) { + case RTmp: + if (r.val < Tmp0) + fprintf(f, "R%d", r.val); + else + fprintf(f, "%%%s", fn->tmp[r.val].name); + break; + case RCon: + printcon(&fn->con[r.val], f); + break; + case RSlot: + fprintf(f, "S%d", r.val); + break; + case RACall: + fprintf(f, "%03x", r.val & AMask); + break; + case RAType: + fprintf(f, ":%s", typ[r.val & AMask].name); + break; + case RAMem: + i = 0; + m = &fn->mem[r.val & AMask]; + fputc('[', f); + if (m->offset.type != CUndef) { + printcon(&m->offset, f); + i = 1; + } + if (!req(m->base, R)) { + if (i) + fprintf(f, " + "); + printref(m->base, fn, f); + i = 1; + } + if (!req(m->index, R)) { + if (i) + fprintf(f, " + "); + fprintf(f, "%d * ", m->scale); + printref(m->index, fn, f); + } + fputc(']', f); + break; + } +} + +void +printfn(Fn *fn, FILE *f) +{ + static char *jtoa[NJmp] = { + [JRet0] = "ret", + [JRetw] = "retw", + [JRetl] = "retl", + [JRetc] = "retc", + [JRets] = "rets", + [JRetd] = "retd", + [JJnz] = "jnz", + [JXJnp] = "xjnp", + [JXJp] = "xjp", + #define X(c) [JXJc+IC##c] = "xj" #c, + ICMPS(X) + #undef X + }; + static char prcls[NOp] = { + [OArg] = 1, + [OSwap] = 1, + [OXCmp] = 1, + [OXTest] = 1, + [OXDiv] = 1, + [OXIDiv] = 1, + }; + static char ktoc[] = "wlsd"; + Blk *b; + Phi *p; + Ins *i; + uint n; + + fprintf(f, "function $%s() {\n", fn->name); + for (b=fn->start; b; b=b->link) { + fprintf(f, "@%s\n", b->name); + for (p=b->phi; p; p=p->link) { + fprintf(f, "\t"); + printref(p->to, fn, f); + fprintf(f, " =%c phi ", ktoc[p->cls]); + assert(p->narg); + for (n=0;; n++) { + fprintf(f, "@%s ", p->blk[n]->name); + printref(p->arg[n], fn, f); + if (n == p->narg-1) { + fprintf(f, "\n"); + break; + } else + fprintf(f, ", "); + } + } + for (i=b->ins; i-b->ins < b->nins; i++) { + fprintf(f, "\t"); + if (!req(i->to, R)) { + printref(i->to, fn, f); + fprintf(f, " =%c ", ktoc[i->cls]); + } + assert(opdesc[i->op].name); + fprintf(f, "%s", opdesc[i->op].name); + if (req(i->to, R) && prcls[i->op]) + fputc(ktoc[i->cls], f); + if (!req(i->arg[0], R)) { + fprintf(f, " "); + printref(i->arg[0], fn, f); + } + if (!req(i->arg[1], R)) { + fprintf(f, ", "); + printref(i->arg[1], fn, f); + } + fprintf(f, "\n"); + } + switch (b->jmp.type) { + case JRet0: + case JRetw: + case JRetl: + case JRets: + case JRetd: + case JRetc: + fprintf(f, "\t%s", jtoa[b->jmp.type]); + if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) { + fprintf(f, " "); + printref(b->jmp.arg, fn, f); + } + if (b->jmp.type == JRetc) + fprintf(f, ", :%s", typ[fn->retty].name); + fprintf(f, "\n"); + break; + case JJmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp @%s\n", b->s1->name); + break; + default: + fprintf(f, "\t%s ", jtoa[b->jmp.type]); + if (b->jmp.type == JJnz) { + printref(b->jmp.arg, fn, f); + fprintf(f, ", "); + } + fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name); + break; + } + } + fprintf(f, "}\n"); +} diff --git a/src/rega.c b/src/rega.c new file mode 100644 index 0000000..7f8edcf --- /dev/null +++ b/src/rega.c @@ -0,0 +1,598 @@ +#include "all.h" + +#ifdef TEST_PMOV + #undef assert + #define assert(x) assert_test(#x, x) +#endif + +typedef struct RMap RMap; + +struct RMap { + int t[NIReg+NFReg]; + int r[NIReg+NFReg]; + BSet b[1]; + int n; +}; + +static bits regu; /* registers used */ +static Tmp *tmp; /* function temporaries */ +static Mem *mem; /* function mem references */ +static struct { + Ref src, dst; + int cls; +} *pm; /* parallel move constructed */ +static int cpm, npm; /* capacity and size of pm */ + +static int * +hint(int t) +{ + return &tmp[phicls(t, tmp)].hint.r; +} + +static void +sethint(int t, int r) +{ + bits m; + + m = tmp[phicls(t, tmp)].hint.m; + if (*hint(t) == -1) + if (!(BIT(r) & m)) + *hint(t) = r; +} + +static void +rcopy(RMap *ma, RMap *mb) +{ + memcpy(ma->t, mb->t, sizeof ma->t); + memcpy(ma->r, mb->r, sizeof ma->r); + bscopy(ma->b, mb->b); + ma->n = mb->n; +} + +static int +rfind(RMap *m, int t) +{ + int i; + + for (i=0; i<m->n; i++) + if (m->t[i] == t) + return m->r[i]; + return -1; +} + +static Ref +rref(RMap *m, int t) +{ + int r, s; + + r = rfind(m, t); + if (r == -1) { + s = tmp[t].slot; + assert(s != -1 && "should have spilled"); + return SLOT(s); + } else + return TMP(r); +} + +static void +radd(RMap *m, int t, int r) +{ + assert((t >= Tmp0 || t == r) && "invalid temporary"); + assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register"); + assert(!bshas(m->b, t) && "temporary has mapping"); + assert(!bshas(m->b, r) && "register already allocated"); + assert(m->n <= NIReg+NFReg && "too many mappings"); + bsset(m->b, t); + bsset(m->b, r); + m->t[m->n] = t; + m->r[m->n] = r; + m->n++; + regu |= BIT(r); +} + +static Ref +ralloc(RMap *m, int t) +{ + bits regs; + int r, r0, r1; + + if (t < Tmp0) { + assert(bshas(m->b, t)); + return TMP(t); + } + if (bshas(m->b, t)) { + r = rfind(m, t); + assert(r != -1); + return TMP(r); + } + r = *hint(t); + if (r == -1 || bshas(m->b, r)) { + regs = tmp[phicls(t, tmp)].hint.m; + regs |= m->b->t[0]; + switch (KBASE(tmp[t].cls)) { + case 0: + r0 = RAX; + r1 = RAX + NIReg; + break; + case 1: + r0 = XMM0; + r1 = XMM0 + NFReg; + break; + } + for (r=r0; r<r1; r++) + if (!(regs & BIT(r))) + goto Found; + for (r=r0; r<r1; r++) + if (!bshas(m->b, r)) + goto Found; + diag("rega: no more regs"); + } +Found: + radd(m, t, r); + sethint(t, r); + return TMP(r); +} + +static int +rfree(RMap *m, int t) +{ + int i, r; + + if (!bshas(m->b, t)) + return -1; + for (i=0; m->t[i] != t; i++) + assert(i+1 < m->n); + r = m->r[i]; + bsclr(m->b, t); + bsclr(m->b, r); + m->n--; + memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); + memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); + return r; +} + +static void +mdump(RMap *m) +{ + int i; + + for (i=0; i<m->n; i++) + fprintf(stderr, " (%s, R%d)", + tmp[m->t[i]].name, + m->r[i]); + fprintf(stderr, "\n"); +} + +static void +pmadd(Ref src, Ref dst, int k) +{ + if (npm == cpm) { + cpm = cpm * 2 + 16; + pm = realloc(pm, cpm * sizeof pm[0]); + if (!pm) + diag("pmadd: out of memory"); + } + pm[npm].src = src; + pm[npm].dst = dst; + pm[npm].cls = k; + npm++; +} + +enum PMStat { ToMove, Moving, Moved }; + +static Ref +pmrec(enum PMStat *status, int i, int *k) +{ + Ref swp, swp1; + int j, k1; + + /* note, this routine might emit + * too many large instructions: + * + * , x -- x + * x -- x -- x | + * ` x -- x + * + * if only the first move is wide + * the whole cycle will be wide, + * this is safe but not necessary + */ + + if (req(pm[i].src, pm[i].dst)) + return R; + status[i] = Moving; + assert(KBASE(*k) == KBASE(pm[i].cls)); + assert((Kw|1) == Kl && (Ks|1) == Kd); + *k |= KWIDE(pm[i].cls); /* see above */ + swp = R; + for (j=0; j<npm; j++) { + if (req(pm[j].src, pm[i].dst)) + switch (status[j]) { + case ToMove: + k1 = *k; + swp1 = pmrec(status, j, &k1); + if (!req(swp1, R)) { + assert(req(swp, R)); + swp = swp1; + *k = k1; + } + break; + case Moving: + assert(req(swp, R)); + swp = pm[i].dst; + break; + case Moved: + break; + } + } + status[i] = Moved; + if (req(swp, R)) { + *curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls}; + return R; + } else if (!req(swp, pm[i].src)) { + *curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k}; + return swp; + } else + return R; + +} + +static void +pmgen() +{ + int i, k; + enum PMStat *status; + + status = alloc(npm * sizeof status[0]); + assert(!npm || status[npm-1] == ToMove); + curi = insb; + for (i=0; i<npm; i++) + if (status[i] == ToMove) { + k = pm[i].cls; + pmrec(status, i, &k); + } +} + +static void +move(int r, Ref to, RMap *m) +{ + int n, t, r1; + + r1 = req(to, R) ? -1 : rfree(m, to.val); + if (bshas(m->b, r) && r1 != r) { + /* r is used and not by to */ + for (n=0; m->r[n] != r; n++) + assert(n+1 < m->n); + t = m->t[n]; + rfree(m, t); + bsset(m->b, r); + ralloc(m, t); + bsclr(m->b, r); + } + t = req(to, R) ? r : to.val; + radd(m, t, r); +} + +static int +regcpy(Ins *i) +{ + return i->op == OCopy && isreg(i->arg[0]); +} + +static Ins * +dopm(Blk *b, Ins *i, RMap *m) +{ + RMap m0; + int n, r, r1, t, s; + Ins *i0, *i1, *ip, *ir; + bits def; + + m0 = *m; + i1 = ++i; + do { + i--; + move(i->arg[0].val, i->to, m); + } while (i != b->ins && regcpy(i-1)); + assert(m0.n <= m->n); + if (i != b->ins && (i-1)->op == OCall) { + def = retregs((i-1)->arg[1], 0); + for (r=0; r<NRSave; r++) + if (!(BIT(rsave[r]) & def)) + move(rsave[r], R, m); + } + for (npm=0, n=0; n<m->n; n++) { + t = m->t[n]; + s = tmp[t].slot; + r1 = m->r[n]; + r = rfind(&m0, t); + if (r != -1) + pmadd(TMP(r1), TMP(r), tmp[t].cls); + else if (s != -1) + pmadd(TMP(r1), SLOT(s), tmp[t].cls); + } + for (ip=i; ip<i1; ip++) { + if (!req(ip->to, R)) + rfree(m, ip->to.val); + r = ip->arg[0].val; + if (rfind(m, r) == -1) + radd(m, r, r); + } + pmgen(); +#ifdef TEST_PMOV + return 0; +#endif + n = b->nins - (i1 - i) + (curi - insb); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, b->ins, i - b->ins); + ip = icpy(ir = ip, insb, curi - insb); + ip = icpy(ip, i1, &b->ins[b->nins] - i1); + b->nins = n; + b->ins = i0; + return ir; +} + +static int +prio(Ref r1, Ref r2) +{ + /* trivial heuristic to begin with, + * later we can use the distance to + * the definition instruction + */ + (void) r2; + return *hint(r1.val) != -1; +} + +static void +insert(Ref *r, Ref **rs, int p) +{ + int i; + + rs[i = p] = r; + while (i-- > 0 && prio(*r, *rs[i])) { + rs[i+1] = rs[i]; + rs[i] = r; + } +} + +static void +doblk(Blk *b, RMap *cur) +{ + int x, r, nr; + bits rs; + Ins *i; + Mem *m; + Ref *ra[4]; + + if (rtype(b->jmp.arg) == RTmp) + b->jmp.arg = ralloc(cur, b->jmp.arg.val); + else if (rtype(b->jmp.arg) == RACall) { + /* add return registers */ + rs = retregs(b->jmp.arg, 0); + for (r=0; rs; rs/=2, r++) + if (rs & 1) + radd(cur, r, r); + } + for (i=&b->ins[b->nins]; i!=b->ins;) { + switch ((--i)->op) { + case OCall: + rs = argregs(i->arg[1], 0); + for (r=0; r<NRSave; r++) + if (!(BIT(rsave[r]) & rs)) + rfree(cur, rsave[r]); + break; + case OCopy: + if (isreg(i->arg[0])) { + i = dopm(b, i, cur); + continue; + } + if (isreg(i->to)) + if (rtype(i->arg[0]) == RTmp) + sethint(i->arg[0].val, i->to.val); + /* fall through */ + default: + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + r = rfree(cur, i->to.val); + if (r == -1 && !isreg(i->to)) { + *i = (Ins){.op = ONop}; + continue; + } + if (i->to.val >= Tmp0) + i->to = TMP(r); + } + break; + } + for (x=0, nr=0; x<2; x++) + switch (rtype(i->arg[x])) { + case RAMem: + m = &mem[i->arg[x].val & AMask]; + if (rtype(m->base) == RTmp) + insert(&m->base, ra, nr++); + if (rtype(m->index) == RTmp) + insert(&m->index, ra, nr++); + break; + case RTmp: + insert(&i->arg[x], ra, nr++); + break; + } + for (r=0; r<nr; r++) + *ra[r] = ralloc(cur, ra[r]->val); + } +} + +/* register allocation + * depends on rpo, phi, cost, (and obviously spill) + */ +void +rega(Fn *fn) +{ + int j, n, t, r, r1, x, rl[Tmp0]; + Blk *b, *b1, *s, ***ps, *blist; + RMap *end, *beg, cur, old; + Ins *i; + Phi *p; + uint u; + Ref src, dst; + + /* 1. setup */ + regu = 0; + tmp = fn->tmp; + mem = fn->mem; + end = alloc(fn->nblk * sizeof end[0]); + beg = alloc(fn->nblk * sizeof beg[0]); + for (n=0; n<fn->nblk; n++) { + bsinit(end[n].b, fn->ntmp); + bsinit(beg[n].b, fn->ntmp); + } + bsinit(cur.b, fn->ntmp); + bsinit(old.b, fn->ntmp); + + for (t=Tmp0; t<fn->ntmp; t++) + *hint(t) = -1; + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (i->op != OCopy || !isreg(i->arg[0])) + break; + else { + assert(rtype(i->to) == RTmp); + sethint(i->to.val, i->arg[0].val); + } + + /* 2. assign registers following post-order */ + for (n=fn->nblk-1; n>=0; n--) { + b = fn->rpo[n]; + cur.n = 0; + bszero(cur.b); + for (x=0; x<2; x++) + for (t=Tmp0; t<fn->ntmp; t++) { + assert(bshas(b->out, t) || + !bshas(cur.b, t)); + if (bshas(b->out, t)) + if (!bshas(cur.b, t)) + if (x || (r=*hint(t)) != -1) + if (x || !bshas(cur.b, r)) + ralloc(&cur, t); + } + rcopy(&end[n], &cur); + doblk(b, &cur); + bscopy(b->in, cur.b); + for (p=b->phi; p; p=p->link) + if (rtype(p->to) == RTmp) { + bsclr(b->in, p->to.val); + /* heuristic 0: + * if the phi destination has an + * argument from a frequent block + * that was already allocated to + * 'r', use 'r' as the new hint + */ + memset(rl, 0, sizeof rl); + for (u=0; u<p->narg; u++) { + t = p->arg[u].val; + b1 = p->blk[u]; + if (rtype(p->arg[u]) == RTmp) + if ((r=rfind(&end[b1->id], t)) != -1) + rl[r] += b1->loop; + } + for (x=0, j=0; j<Tmp0; j++) + if (rl[j] > rl[x]) + x = j; + if (rl[x] >= b->loop) + *hint(p->to.val) = x; + } + if (b->npred > 1) { + /* heuristic 1: + * attempt to satisfy hints + * when it's simple and we have + * multiple predecessors + */ + rcopy(&old, &cur); + curi = &insb[NIns]; + for (j=0; j<old.n; j++) { + t = old.t[j]; + r = *hint(t); + r1 = rfind(&cur, t); + if (r != -1 && r != r1) + if (!bshas(cur.b, r)) { + rfree(&cur, t); + radd(&cur, t, r); + x = tmp[t].cls; + emit(OCopy, x, TMP(r1), TMP(r), R); + } + } + if ((j = &insb[NIns] - curi)) { + b->nins += j; + i = alloc(b->nins * sizeof(Ins)); + icpy(icpy(i, curi, j), b->ins, b->nins-j); + b->ins = i; + } + } + rcopy(&beg[n], &cur); + } + if (debug['R']) { + fprintf(stderr, "\n> Register mappings:\n"); + for (n=0; n<fn->nblk; n++) { + b = fn->rpo[n]; + fprintf(stderr, "\t%-10s beg", b->name); + mdump(&beg[n]); + fprintf(stderr, "\t end"); + mdump(&end[n]); + } + fprintf(stderr, "\n"); + } + + /* 3. compose glue code */ + blist = 0; + for (b=fn->start;; b=b->link) { + ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; + for (; (s=**ps); ps++) { + npm = 0; + for (p=s->phi; p; p=p->link) { + dst = p->to; + assert(rtype(dst)==RSlot || rtype(dst)==RTmp); + if (rtype(dst) == RTmp) { + r = rfind(&beg[s->id], dst.val); + if (r == -1) + continue; + dst = TMP(r); + } + for (u=0; p->blk[u]!=b; u++) + assert(u+1 < p->narg); + src = p->arg[u]; + if (rtype(src) == RTmp) + src = rref(&end[b->id], src.val); + pmadd(src, dst, p->cls); + } + for (t=Tmp0; t<fn->ntmp; t++) + if (bshas(s->in, t)) { + src = rref(&end[b->id], t); + dst = rref(&beg[s->id], t); + pmadd(src, dst, tmp[t].cls); + } + pmgen(); + if (curi == insb) + continue; + b1 = blknew(); + b1->loop = (b->loop+s->loop) / 2; + b1->link = blist; + blist = b1; + fn->nblk++; + sprintf(b1->name, "%s_%s", b->name, s->name); + b1->nins = curi - insb; + idup(&b1->ins, insb, b1->nins); + b1->jmp.type = JJmp; + b1->s1 = s; + **ps = b1; + } + if (!b->link) { + b->link = blist; + break; + } + } + for (b=fn->start; b; b=b->link) + b->phi = 0; + fn->reg = regu; + + if (debug['R']) { + fprintf(stderr, "\n> After register allocation:\n"); + printfn(fn, stderr); + } +} diff --git a/src/spill.c b/src/spill.c new file mode 100644 index 0000000..72f8106 --- /dev/null +++ b/src/spill.c @@ -0,0 +1,507 @@ +#include "all.h" + +static void +loopmark(Blk *hd, Blk *b, Phi *p) +{ + int k, head; + uint n, a; + + head = hd->id; + if (b->id < head) + return; + for (; p; p=p->link) + for (a=0; a<p->narg; a++) + if (p->blk[a] == b) + if (rtype(p->arg[a]) == RTmp) + bsset(hd->gen, p->arg[a].val); + if (b->visit == head) + return; + b->visit = head; + b->loop *= 10; + /* aggregate looping information at + * loop headers */ + bsunion(hd->gen, b->gen); + for (k=0; k<2; k++) + if (b->nlive[k] > hd->nlive[k]) + hd->nlive[k] = b->nlive[k]; + for (n=0; n<b->npred; n++) + loopmark(hd, b->pred[n], b->phi); +} + +static void +tmpuse(Ref r, int use, int loop, Fn *fn) +{ + Mem *m; + Tmp *t; + + if (rtype(r) == RAMem) { + m = &fn->mem[r.val & AMask]; + tmpuse(m->base, 1, loop, fn); + tmpuse(m->index, 1, loop, fn); + } + else if (rtype(r) == RTmp && r.val >= Tmp0) { + t = &fn->tmp[r.val]; + t->nuse += use; + t->ndef += !use; + t->cost += loop; + } +} + +/* evaluate spill costs of temporaries, + * this also fills usage information + * requires rpo, preds + */ +void +fillcost(Fn *fn) +{ + int n, hd; + uint a; + Blk *b; + Ins *i; + Tmp *t; + Phi *p; + + for (b=fn->start; b; b=b->link) { + b->loop = 1; + b->visit = -1; + } + if (debug['S']) + fprintf(stderr, "\n> Loop information:\n"); + for (n=0; n<fn->nblk; n++) { + b = fn->rpo[n]; + hd = 0; + for (a=0; a<b->npred; a++) + if (b->pred[a]->id >= n) { + loopmark(b, b->pred[a], b->phi); + hd = 1; + } + if (hd && debug['S']) { + fprintf(stderr, "\t%-10s", b->name); + fprintf(stderr, " (% 3d ", b->nlive[0]); + fprintf(stderr, "% 3d) ", b->nlive[1]); + dumpts(b->gen, fn->tmp, stderr); + } + } + for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) { + t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0; + t->nuse = 0; + t->ndef = 0; + } + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) { + /* todo, the cost computation + * for p->to is not great... */ + tmpuse(p->to, 0, 0, fn); + for (a=0; a<p->narg; a++) { + n = p->blk[a]->loop; + assert(b->npred==p->narg && + "wrong cfg"); + n /= b->npred; + tmpuse(p->arg[a], 1, n, fn); + } + } + n = b->loop; + for (i=b->ins; i-b->ins < b->nins; i++) { + tmpuse(i->to, 0, n, fn); + tmpuse(i->arg[0], 1, n, fn); + tmpuse(i->arg[1], 1, n, fn); + } + tmpuse(b->jmp.arg, 1, n, fn); + } + if (debug['S']) { + fprintf(stderr, "\n> Spill costs:\n"); + for (n=Tmp0; n<fn->ntmp; n++) + fprintf(stderr, "\t%-10s %d\n", + fn->tmp[n].name, + fn->tmp[n].cost); + fprintf(stderr, "\n"); + } +} + +static BSet *fst; /* temps to prioritize in registers (for tcmp1) */ +static Tmp *tmp; /* current temporaries (for tcmpX) */ +static int ntmp; /* current # of temps (for limit) */ +static int locs; /* stack size used by locals */ +static int slot4; /* next slot of 4 bytes */ +static int slot8; /* ditto, 8 bytes */ +static BSet mask[2][1]; /* class masks */ + +static int +tcmp0(const void *pa, const void *pb) +{ + return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost; +} + +static int +tcmp1(const void *pa, const void *pb) +{ + int c; + + c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa); + return c ? c : tcmp0(pa, pb); +} + +static Ref +slot(int t) +{ + int s; + + if (t < Tmp0) + diag("spill: cannot spill register"); + s = tmp[t].slot; + if (s == -1) { + assert(NAlign == 3); + /* nice logic to pack stack slots + * on demand, there can be only + * one hole and slot4 points to it + * + * invariant: slot4 <= slot8 + */ + if (KWIDE(tmp[t].cls)) { + s = slot8; + if (slot4 == slot8) + slot4 += 2; + slot8 += 2; + } else { + s = slot4; + if (slot4 == slot8) { + slot8 += 2; + slot4 += 1; + } else + slot4 = slot8; + } + s += locs; + tmp[t].slot = s; + } + return SLOT(s); +} + +static void +limit(BSet *b, int k, BSet *f) +{ + static int *tarr, maxt; + int i, nt; + uint t; + + nt = bscount(b); + if (nt <= k) + return; + if (nt > maxt) { + free(tarr); + tarr = emalloc(nt * sizeof tarr[0]); + maxt = nt; + } + for (i=0, t=0; bsiter(b, &t); t++) { + bsclr(b, t); + tarr[i++] = t; + } + if (!f) + qsort(tarr, nt, sizeof tarr[0], tcmp0); + else { + fst = f; + qsort(tarr, nt, sizeof tarr[0], tcmp1); + } + for (i=0; i<k && i<nt; i++) + bsset(b, tarr[i]); + for (; i<nt; i++) + slot(tarr[i]); +} + +static void +limit2(BSet *b1, int k1, int k2, BSet *fst) +{ + BSet b2[1]; + + bsinit(b2, ntmp); /* todo, free those */ + bscopy(b2, b1); + bsinter(b1, mask[0]); + bsinter(b2, mask[1]); + limit(b1, NIReg - k1, fst); + limit(b2, NFReg - k2, fst); + bsunion(b1, b2); +} + +static void +sethint(BSet *u, bits r) +{ + uint t; + + for (t=Tmp0; bsiter(u, &t); t++) + tmp[phicls(t, tmp)].hint.m |= r; +} + +static void +reloads(BSet *u, BSet *v) +{ + uint t; + + for (t=Tmp0; bsiter(u, &t); t++) + if (!bshas(v, t)) + emit(OLoad, tmp[t].cls, TMP(t), slot(t), R); +} + +static void +store(Ref r, int s) +{ + static int kstore[] = { + [Kw] = OStorew, [Kl] = OStorel, + [Ks] = OStores, [Kd] = OStored, + }; + + if (s != -1) + emit(kstore[tmp[r.val].cls], 0, R, r, SLOT(s)); +} + +static int +regcpy(Ins *i) +{ + return i->op == OCopy && isreg(i->arg[0]); +} + +static Ins * +dopm(Blk *b, Ins *i, BSet *v) +{ + int n, t; + BSet u[1]; + Ins *i1; + bits r; + + bsinit(u, ntmp); /* todo, free those */ + /* consecutive copies from + * registers need to be handled + * as one large instruction + * + * fixme: there is an assumption + * that calls are always followed + * by copy instructions here, this + * might not be true if previous + * passes change + */ + i1 = ++i; + do { + i--; + t = i->to.val; + if (!req(i->to, R)) + if (bshas(v, t)) { + bsclr(v, t); + store(i->to, tmp[t].slot); + } + bsset(v, i->arg[0].val); + } while (i != b->ins && regcpy(i-1)); + bscopy(u, v); + if (i != b->ins && (i-1)->op == OCall) { + v->t[0] &= ~retregs((i-1)->arg[1], 0); + limit2(v, NISave, NFSave, 0); + for (r=0, n=0; n<NRSave; n++) + r |= BIT(rsave[n]); + v->t[0] |= argregs((i-1)->arg[1], 0); + } else { + limit2(v, 0, 0, 0); + r = v->t[0]; + } + sethint(v, r); + reloads(u, v); + do + emiti(*--i1); + while (i1 != i); + return i; +} + +/* spill code insertion + * requires spill costs, rpo, liveness + * + * Note: this will replace liveness + * information (in, out) with temporaries + * that must be in registers at block + * borders + * + * Be careful with: + * - OCopy instructions to ensure register + * constraints + */ +void +spill(Fn *fn) +{ + Blk *b, *s1, *s2, *hd, **bp; + int j, n, l, t, k, lvarg[2]; + BSet u[1], v[1], w[1]; + Ins *i; + Phi *p; + Mem *m; + bits r; + + tmp = fn->tmp; + ntmp = fn->ntmp; + bsinit(u, ntmp); + bsinit(v, ntmp); + bsinit(w, ntmp); + bsinit(mask[0], ntmp); + bsinit(mask[1], ntmp); + locs = fn->slot; + slot4 = 0; + slot8 = 0; + for (t=0; t<ntmp; t++) { + k = 0; + if (t >= XMM0 && t < XMM0 + NFReg) + k = 1; + else if (t >= Tmp0) + k = KBASE(tmp[t].cls); + bsset(mask[k], t); + } + + for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) { + b = *--bp; + /* invariant: all bocks with bigger rpo got + * their in,out updated. */ + + /* 1. find temporaries in registers at + * the end of the block (put them in v) */ + curi = 0; + s1 = b->s1; + s2 = b->s2; + hd = 0; + if (s1 && s1->id <= n) + hd = s1; + if (s2 && s2->id <= n) + if (!hd || s2->id >= hd->id) + hd = s2; + r = 0; + bszero(v); + if (hd) { + /* back-edge */ + for (k=0; k<2; k++) { + n = k == 0 ? NIReg : NFReg; + bscopy(u, b->out); + bsinter(u, mask[k]); + bscopy(w, u); + bsinter(u, hd->gen); + bsdiff(w, hd->gen); + if ((int)bscount(u) < n) { /* fixme */ + j = bscount(w); /* live through */ + l = hd->nlive[k]; + limit(w, n - (l - j), 0); + bsunion(u, w); + } else + limit(u, n, 0); + bsunion(v, u); + } + } else if (s1) { + liveon(v, b, s1); + if (s2) { + liveon(u, b, s2); + bscopy(w, u); + bsinter(w, v); + bsunion(v, u); + } + limit2(v, 0, 0, w); + } else if (rtype(b->jmp.arg) == RACall) { + /* return */ + r = retregs(b->jmp.arg, 0); + v->t[0] |= r; + } + bscopy(b->out, v); + + /* 2. process the block instructions */ + curi = &insb[NIns]; + for (i=&b->ins[b->nins]; i!=b->ins;) { + i--; + if (regcpy(i)) { + i = dopm(b, i, v); + continue; + } + bszero(w); + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + if (bshas(v, t)) + bsclr(v, t); + else { + /* make sure we have a reg + * for the result */ + bsset(v, t); + bsset(w, t); + } + } + j = opdesc[i->op].nmem; + for (n=0; n<2; n++) + if (rtype(i->arg[n]) == RAMem) + j--; + for (n=0; n<2; n++) + switch (rtype(i->arg[n])) { + case RAMem: + t = i->arg[n].val; + m = &fn->mem[t & AMask]; + if (rtype(m->base) == RTmp) { + bsset(v, m->base.val); + bsset(w, m->base.val); + } + if (rtype(m->index) == RTmp) { + bsset(v, m->index.val); + bsset(w, m->index.val); + } + break; + case RTmp: + t = i->arg[n].val; + lvarg[n] = bshas(v, t); + bsset(v, t); + if (j-- <= 0) + bsset(w, t); + break; + } + bscopy(u, v); + limit2(v, 0, 0, w); + for (n=0; n<2; n++) + if (rtype(i->arg[n]) == RTmp) { + t = i->arg[n].val; + if (!bshas(v, t)) { + /* do not reload if the + * the temporary was dead + */ + if (!lvarg[n]) + bsclr(u, t); + i->arg[n] = slot(t); + } + } + reloads(u, v); + if (!req(i->to, R)) { + t = i->to.val; + store(i->to, tmp[t].slot); + bsclr(v, t); + } + emiti(*i); + r = v->t[0] & (BIT(Tmp0)-1); + if (r) + sethint(v, r); + } + assert(!r || b==fn->start); + + for (p=b->phi; p; p=p->link) { + assert(rtype(p->to) == RTmp); + t = p->to.val; + if (bshas(v, t)) { + bsclr(v, t); + store(p->to, tmp[t].slot); + } else if (bshas(b->in, t)) + /* only if the phi is live */ + p->to = slot(p->to.val); + } + bscopy(b->in, v); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + + /* align the locals to a 16 byte boundary */ + assert(NAlign == 3); + slot8 += slot8 & 3; + fn->slot += slot8; + + if (debug['S']) { + fprintf(stderr, "\n> Block information:\n"); + for (b=fn->start; b; b=b->link) { + printf("\t%-10s (% 5d) ", b->name, b->loop); + dumpts(b->out, fn->tmp, stdout); + } + fprintf(stderr, "\n> After spilling:\n"); + printfn(fn, stderr); + } +} diff --git a/src/ssa.c b/src/ssa.c new file mode 100644 index 0000000..0c163aa --- /dev/null +++ b/src/ssa.c @@ -0,0 +1,516 @@ +#include "all.h" +#include <stdarg.h> + +static void +adduse(Tmp *tmp, int ty, Blk *b, ...) +{ + Use *u; + int n; + va_list ap; + + va_start(ap, b); + n = tmp->nuse; + vgrow(&tmp->use, ++tmp->nuse); + u = &tmp->use[n]; + u->type = ty; + u->bid = b->id; + switch (ty) { + default: + diag("ssa: adduse defaulted"); + case UPhi: + u->u.phi = va_arg(ap, Phi *); + break; + case UIns: + u->u.ins = va_arg(ap, Ins *); + break; + case UJmp: + break; + } + va_end(ap); +} + +/* fill usage, phi, and class information + */ +void +filluse(Fn *fn) +{ + Blk *b; + Phi *p; + Ins *i; + int m, t; + uint a; + Tmp *tmp; + + /* todo, is this the correct file? */ + tmp = fn->tmp; + for (t=0; t<fn->ntmp; t++) { + tmp[t].ndef = 0; + tmp[t].nuse = 0; + tmp[t].phi = 0; + tmp[t].cls = 0; + if (tmp[t].use == 0) + tmp[t].use = vnew(0, sizeof(Use)); + } + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) { + assert(rtype(p->to) == RTmp); + t = p->to.val; + tmp[t].ndef++; + tmp[t].cls = p->cls; + tmp[t].phi = p->to.val; + for (a=0; a<p->narg; a++) + if (rtype(p->arg[a]) == RTmp) { + t = p->arg[a].val; + adduse(&tmp[t], UPhi, b, p); + if (!tmp[t].phi) + tmp[t].phi = p->to.val; + } + } + for (i=b->ins; i-b->ins < b->nins; i++) { + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + tmp[t].ndef++; + tmp[t].cls = i->cls; + } + for (m=0; m<2; m++) + if (rtype(i->arg[m]) == RTmp) { + t = i->arg[m].val; + adduse(&tmp[t], UIns, b, i); + } + } + if (rtype(b->jmp.arg) == RTmp) + adduse(&tmp[b->jmp.arg.val], UJmp, b); + } +} + +static void +addpred(Blk *bp, Blk *bc) +{ + uint i; + + if (!bc->pred) { + bc->pred = alloc(bc->npred * sizeof bc->pred[0]); + for (i=0; i<bc->npred; i++) + bc->pred[i] = 0; + } + for (i=0; bc->pred[i]; i++) + ; + bc->pred[i] = bp; +} + +/* fill predecessors information in blocks + */ +void +fillpreds(Fn *f) +{ + Blk *b; + + for (b=f->start; b; b=b->link) { + b->npred = 0; + b->pred = 0; + } + for (b=f->start; b; b=b->link) { + if (b->s1) + b->s1->npred++; + if (b->s2) + b->s2->npred++; + } + for (b=f->start; b; b=b->link) { + if (b->s1) + addpred(b, b->s1); + if (b->s2) + addpred(b, b->s2); + } +} + +static int +rporec(Blk *b, int x) +{ + Blk *s1, *s2; + + if (!b || b->id >= 0) + return x; + b->id = 1; + s1 = b->s1; + s2 = b->s2; + if (s1 && s2 && s1->loop > s2->loop) { + s1 = b->s2; + s2 = b->s1; + } + x = rporec(s1, x); + x = rporec(s2, x); + b->id = x; + assert(x >= 0); + return x - 1; +} + +/* fill the rpo information in blocks + */ +void +fillrpo(Fn *f) +{ + int n; + Blk *b, **p; + + for (b=f->start; b; b=b->link) + b->id = -1; + n = 1 + rporec(f->start, f->nblk-1); + f->nblk -= n; + f->rpo = alloc(f->nblk * sizeof f->rpo[0]); + for (p=&f->start; *p;) { + b = *p; + if (b->id == -1) { + *p = b->link; + /* todo, free block */ + } else { + b->id -= n; + f->rpo[b->id] = b; + p=&(*p)->link; + } + } +} + +/* for dominators computation, read + * "A Simple, Fast Dominance Algorithm" + * by K. Cooper, T. Harvey, and K. Kennedy. + */ + +static Blk * +inter(Blk *b1, Blk *b2) +{ + Blk *bt; + + if (b1 == 0) + return b2; + while (b1 != b2) { + if (b1->id < b2->id) { + bt = b1; + b1 = b2; + b2 = bt; + } + while (b1->id > b2->id) { + b1 = b1->idom; + assert(b1); + } + } + return b1; +} + +static void +filldom(Fn *fn) +{ + Blk *b, *d; + int ch, n; + uint p; + + for (b=fn->start; b; b=b->link) { + b->idom = 0; + b->dom = 0; + b->dlink = 0; + } + do { + ch = 0; + for (n=1; n<fn->nblk; n++) { + b = fn->rpo[n]; + d = 0; + for (p=0; p<b->npred; p++) + if (b->pred[p]->idom + || b->pred[p] == fn->start) + d = inter(d, b->pred[p]); + if (d != b->idom) { + ch++; + b->idom = d; + } + } + } while (ch); + for (b=fn->start; b; b=b->link) + if ((d=b->idom)) { + assert(d != b); + b->dlink = d->dom; + d->dom = b; + } +} + +static int +sdom(Blk *b1, Blk *b2) +{ + assert(b1 && b2); + if (b1 == b2) + return 0; + while (b2->id > b1->id) + b2 = b2->idom; + return b1 == b2; +} + +static int +dom(Blk *b1, Blk *b2) +{ + return b1 == b2 || sdom(b1, b2); +} + +static void +addfron(Blk *a, Blk *b) +{ + int n; + + for (n=0; n<a->nfron; n++) + if (a->fron[n] == b) + return; + if (!a->nfron) + a->fron = vnew(++a->nfron, sizeof a->fron[0]); + else + vgrow(&a->fron, ++a->nfron); + a->fron[a->nfron-1] = b; +} + +static void +fillfron(Fn *fn) +{ + Blk *a, *b; + + for (b=fn->start; b; b=b->link) { + if (b->s1) + for (a=b; !sdom(a, b->s1); a=a->idom) + addfron(a, b->s1); + if (b->s2) + for (a=b; !sdom(a, b->s2); a=a->idom) + addfron(a, b->s2); + } +} + +static Ref +refindex(int t, Fn *fn) +{ + return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn); +} + +static void +phiins(Fn *fn) +{ + BSet u[1], defs[1]; + Blk *a, *b, **blist, **be, **bp; + Ins *i; + Phi *p; + Ref r; + int t, n, k, nt; + + bsinit(u, fn->nblk); + bsinit(defs, fn->nblk); + blist = emalloc(fn->nblk * sizeof blist[0]); + be = &blist[fn->nblk]; + nt = fn->ntmp; + for (t=Tmp0; t<nt; t++) { + fn->tmp[t].visit = 0; + if (fn->tmp[t].phi != 0) + continue; + bszero(u); + k = -1; + bp = be; + for (b=fn->start; b; b=b->link) { + b->visit = 0; + r = R; + for (i=b->ins; i-b->ins < b->nins; i++) { + if (!req(r, R)) { + if (req(i->arg[0], TMP(t))) + i->arg[0] = r; + if (req(i->arg[1], TMP(t))) + i->arg[1] = r; + } + if (req(i->to, TMP(t))) { + if (!bshas(b->out, t)) { + if (fn->tmp[t].ndef == 1) + r = TMP(t); + else + r = refindex(t, fn); + i->to = r; + } else { + if (!bshas(u, b->id)) { + bsset(u, b->id); + *--bp = b; + } + if (k == -1) + k = i->cls; + assert(k == i->cls); + } + } + } + if (!req(r, R) && req(b->jmp.arg, TMP(t))) + b->jmp.arg = r; + } + bscopy(defs, u); + while (bp != be) { + fn->tmp[t].visit = t; + b = *bp++; + bsclr(u, b->id); + for (n=0; n<b->nfron; n++) { + a = b->fron[n]; + if (a->visit++ == 0) + if (bshas(a->in, t)) { + p = alloc(sizeof *p); + p->cls = k; + p->to = TMP(t); + p->link = a->phi; + a->phi = p; + if (!bshas(defs, a->id)) + if (!bshas(u, a->id)) { + bsset(u, a->id); + *--bp = a; + } + } + } + } + } + free(blist); +} + +typedef struct Name Name; +struct Name { + Ref r; + Blk *b; + Name *up; +}; + +static Name *namel; + +static Name * +nnew(Ref r, Blk *b, Name *up) +{ + Name *n; + + if (namel) { + n = namel; + namel = n->up; + } else + /* could use alloc, here + * but namel should be reset + */ + n = emalloc(sizeof *n); + n->r = r; + n->b = b; + n->up = up; + return n; +} + +static void +nfree(Name *n) +{ + n->up = namel; + namel = n; +} + +static void +rendef(Ref *r, Blk *b, Name **stk, Fn *fn) +{ + Ref r1; + int t; + + t = r->val; + if (req(*r, R) || !fn->tmp[t].visit) + return; + r1 = refindex(t, fn); + fn->tmp[r1.val].visit = t; + stk[t] = nnew(r1, b, stk[t]); + *r = r1; +} + +static Ref +getstk(int t, Blk *b, Name **stk) +{ + Name *n, *n1; + + n = stk[t]; + while (n && !dom(n->b, b)) { + n1 = n; + n = n->up; + nfree(n1); + } + stk[t] = n; + if (!n) { + /* uh, oh, warn */ + return CON_Z; + } else + return n->r; +} + +static void +renblk(Blk *b, Name **stk, Fn *fn) +{ + Phi *p; + Ins *i; + Blk *s, **ps, *succ[3]; + int t, m; + + for (p=b->phi; p; p=p->link) + rendef(&p->to, b, stk, fn); + for (i=b->ins; i-b->ins < b->nins; i++) { + for (m=0; m<2; m++) { + t = i->arg[m].val; + if (rtype(i->arg[m]) == RTmp) + if (fn->tmp[t].visit) + i->arg[m] = getstk(t, b, stk); + } + rendef(&i->to, b, stk, fn); + } + t = b->jmp.arg.val; + if (rtype(b->jmp.arg) == RTmp) + if (fn->tmp[t].visit) + b->jmp.arg = getstk(t, b, stk); + succ[0] = b->s1; + succ[1] = b->s2; + succ[2] = 0; + for (ps=succ; (s=*ps); ps++) + for (p=s->phi; p; p=p->link) { + t = p->to.val; + if ((t=fn->tmp[t].visit)) { + m = p->narg++; + if (m == NPred) + diag("ssa: too many phi arguments"); + p->arg[m] = getstk(t, b, stk); + p->blk[m] = b; + } + } + for (s=b->dom; s; s=s->dlink) + renblk(s, stk, fn); +} + +/* require ndef */ +void +ssa(Fn *fn) +{ + Name **stk, *n; + int d, nt; + Blk *b, *b1; + + nt = fn->ntmp; + stk = emalloc(nt * sizeof stk[0]); + d = debug['L']; + debug['L'] = 0; + filldom(fn); + if (debug['N']) { + fprintf(stderr, "\n> Dominators:\n"); + for (b1=fn->start; b1; b1=b1->link) { + if (!b1->dom) + continue; + fprintf(stderr, "%10s:", b1->name); + for (b=b1->dom; b; b=b->dlink) + fprintf(stderr, " %s", b->name); + fprintf(stderr, "\n"); + } + } + fillfron(fn); + filllive(fn); + phiins(fn); + renblk(fn->start, stk, fn); + while (nt--) + while ((n=stk[nt])) { + stk[nt] = n->up; + nfree(n); + } + debug['L'] = d; + free(stk); + if (debug['N']) { + fprintf(stderr, "\n> After SSA construction:\n"); + printfn(fn, stderr); + } +} diff --git a/src/test/_alt.ssa b/src/test/_alt.ssa new file mode 100644 index 0000000..3f89e5e --- /dev/null +++ b/src/test/_alt.ssa @@ -0,0 +1,25 @@ +# an example with reducible control +# flow graph that exposes poor +# handling of looping constructs + +function $test() { +@start + %ten =w copy 10 + %dum =w copy 0 # dummy live-through temporary +@loop + %alt =w phi @start 0, @left %alt1, @right %alt1 + %cnt =w phi @start 100, @left %cnt, @right %cnt1 + %alt1 =w sub 1, %alt + jnz %alt1, @right, @left +@left + %x =w phi @loop 10, @left %x1 + %x1 =w sub %x, 1 + %z =w copy %x + jnz %z, @left, @loop +@right + %cnt1 =w sub %cnt, %ten + jnz %cnt1, @loop, @end +@end + %ret =w add %cnt, %dum + ret +} diff --git a/src/test/_dragon.ssa b/src/test/_dragon.ssa new file mode 100644 index 0000000..b169e1b --- /dev/null +++ b/src/test/_dragon.ssa @@ -0,0 +1,33 @@ +# a moderately complex test for +# dominators computation from +# the dragon book +# because branching is limited to +# two, I had to split some blocks + +function $dragon() { +@start +@b1 + jnz 0, @b2, @b3 +@b2 + jmp @b3 +@b3 + jmp @b4.1 +@b4.1 + jnz 0, @b3, @b4.2 +@b4.2 + jnz 0, @b5, @b6 +@b5 + jmp @b7 +@b6 + jmp @b7 +@b7 + jnz 0, @b8.1, @b4.1 +@b8.1 + jnz 0, @b3, @b8.2 +@b8.2 + jnz 0, @b9, @b10 +@b9 + jmp @b1 +@b10 + jmp @b7 +} diff --git a/src/test/_fix1.ssa b/src/test/_fix1.ssa new file mode 100644 index 0000000..e89307f --- /dev/null +++ b/src/test/_fix1.ssa @@ -0,0 +1,15 @@ +function $test() { +@start + %x =w copy 1 +@loop + jnz %x, @noz, @isz +@noz + %x =w copy 0 + jmp @end +@isz + %x =w copy 1 + jmp @loop +@end + %z =w add 10, %x + ret +} diff --git a/src/test/_fix2.ssa b/src/test/_fix2.ssa new file mode 100644 index 0000000..89f236d --- /dev/null +++ b/src/test/_fix2.ssa @@ -0,0 +1,15 @@ +function $test() { +@start + %x =w copy 1 +@loop + jnz %x, @noz, @isz +@noz + %x =w copy 0 + jnz %x, @loop, @end +@isz + %x =w copy 1 + jmp @loop +@end + %z =w add 10, %x + ret +} diff --git a/src/test/_fix3.ssa b/src/test/_fix3.ssa new file mode 100644 index 0000000..283e5a1 --- /dev/null +++ b/src/test/_fix3.ssa @@ -0,0 +1,20 @@ +function w $test() { +@start + %x =w copy 100 + %s =w copy 0 +@l + %c =w cslew %x, 10 + jnz %c, @a, @b +@a + %s =w add %s, %x + %x =w sub %x, 1 + jmp @c +@b + %s =w sub %s, %x + jmp @c +@c + %x =w sub %x, 1 + jnz %x, @l, @end +@end + ret %s +} diff --git a/src/test/_fix4.ssa b/src/test/_fix4.ssa new file mode 100644 index 0000000..181768d --- /dev/null +++ b/src/test/_fix4.ssa @@ -0,0 +1,27 @@ +function $test() { +@start + %x =w copy 3 + %n =w copy 2 +@loop + %c =w ceqw %n, 10000 + jnz %c, @end, @next +@next + %t =w copy 3 + %x =w add %x, 2 +@tloop + %s =w mul %t, %t + %c =w csgtw %s, %x + jnz %c, @prime, @test +@test + %r =w rem %x, %t + jnz %r, @tnext, @loop +@tnext + %t =w add %t, 2 + jmp @tloop +@prime + %n =w add %n, 1 + jmp @loop +@end + storew %x, $a + ret +} diff --git a/src/test/_live.ssa b/src/test/_live.ssa new file mode 100644 index 0000000..fce4cb9 --- /dev/null +++ b/src/test/_live.ssa @@ -0,0 +1,21 @@ +# this control flow graph is irreducible +# yet, we expecet the liveness analysis +# to work properly and make %x live in +# the block @left +# +# nothing should ever be live at the entry + +function $test() { +@start + %b =w copy 0 + %x =w copy 10 + jnz 0, @loop, @left +@left + jmp @inloop +@loop + %x1 =w add %x, 1 +@inloop + %b1 =w add %b, 1 +@endloop + jmp @loop +} diff --git a/src/test/_rpo.ssa b/src/test/_rpo.ssa new file mode 100644 index 0000000..a10c6b1 --- /dev/null +++ b/src/test/_rpo.ssa @@ -0,0 +1,12 @@ +function $test() { +@start + jmp @foo +@baz + jnz 1, @end, @foo +@bar + jmp @end +@foo + jnz 0, @bar, @baz +@end + ret +} diff --git a/src/test/_spill1.ssa b/src/test/_spill1.ssa new file mode 100644 index 0000000..df5e4c2 --- /dev/null +++ b/src/test/_spill1.ssa @@ -0,0 +1,22 @@ +# test with NReg == 3 +# there must be a spill +# happening on %c +# +# if you replace the sub +# by an add or comment +# the two marked lines +# there should be no +# spill +# + +function $test() { +@start + %f =w copy 0 # here + %b =w copy 1 + %c =w copy 2 + %a =w sub %b, %c + %d =w copy %b + %e =w copy %f # and there + %g =w copy %a + ret +} diff --git a/src/test/_spill2.ssa b/src/test/_spill2.ssa new file mode 100644 index 0000000..d462d0b --- /dev/null +++ b/src/test/_spill2.ssa @@ -0,0 +1,22 @@ +# stupid spilling test + +function $test() { +@start + %x1 =w copy 10 + %x2 =w add %x1, %x1 + %x3 =w sub %x2, %x1 + %x4 =w add %x3, %x1 + %x5 =w sub %x4, %x1 + %x6 =w add %x5, %x1 + %x7 =w sub %x6, %x1 + %x8 =w add %x7, %x1 + %x9 =w sub %x8, %x8 + %x10 =w add %x9, %x7 + %x11 =w sub %x10, %x6 + %x12 =w add %x11, %x5 + %x13 =w sub %x12, %x4 + %x14 =w add %x13, %x3 + %x15 =w sub %x14, %x2 + %x16 =w add %x15, %x1 + ret +} diff --git a/src/test/_spill3.ssa b/src/test/_spill3.ssa new file mode 100644 index 0000000..cdfda2d --- /dev/null +++ b/src/test/_spill3.ssa @@ -0,0 +1,24 @@ +# make sure comparisons +# never get their two +# operands in memory +# run with NReg == 3, or +# adapt it! + +function $test() { +@start + %a =w loadw $a + %b =w loadw $a + +@loop + %c =w phi @start 0, @loop %f + %d =w phi @start 0, @loop %g + %e =w phi @start 0, @loop %h + %f =w add %c, %d + %g =w add %c, %e + %h =w add %e, %d + %x =w cslew %a, %b + jnz %x, @loop, @end + +@end + ret +} diff --git a/src/test/abi1.ssa b/src/test/abi1.ssa new file mode 100644 index 0000000..69cce44 --- /dev/null +++ b/src/test/abi1.ssa @@ -0,0 +1,59 @@ +# test calling into C with two +# large struct arguments (passed +# on the stack) + +type :mem = { b 17 } + +function $alpha(l %p, w %l, l %n) { +@ini + %pe =l add %p, %n +@lop + %p1 =l phi @ini %p, @lop %p2 + %l1 =w phi @ini %l, @lop %l2 + storeb %l1, %p1 + %p2 =l add %p1, 1 + %l2 =w add %l1, 1 + %c1 =w ceql %p1, %pe + jnz %c1, @end, @lop +@end + storeb 0, %pe + ret +} + +function $test() { +@start + %p =l alloc4 17 + %q =l alloc4 17 + %r0 =w call $alpha(l %p, w 65, l 16) + %r1 =w call $alpha(l %q, w 97, l 16) + %r2 =w call $fcb(:mem %p, w 1, w 2, w 3, w 4, w 5, w 6, w 7, w 8, w 9, :mem %q) + ret +} + + +# >>> driver +# #include <stdio.h> +# typedef struct { char t[17]; } mem; +# extern void test(); +# void fcb(mem m, int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, mem n) { +# printf("fcb: m = (mem){ t = \"%s\" }\n", m.t); +# printf(" n = (mem){ t = \"%s\" }\n", n.t); +# #define T(n) printf(" i%d = %d\n", n, i##n); +# T(1) T(2) T(3) T(4) T(5) T(6) T(7) T(8) T(9) +# } +# int main() { test(); return 0; } +# <<< + +# >>> output +# fcb: m = (mem){ t = "ABCDEFGHIJKLMNOP" } +# n = (mem){ t = "abcdefghijklmnop" } +# i1 = 1 +# i2 = 2 +# i3 = 3 +# i4 = 4 +# i5 = 5 +# i6 = 6 +# i7 = 7 +# i8 = 8 +# i9 = 9 +# <<< diff --git a/src/test/abi2.ssa b/src/test/abi2.ssa new file mode 100644 index 0000000..b82c80c --- /dev/null +++ b/src/test/abi2.ssa @@ -0,0 +1,18 @@ +type :fps = { s, b, s } + +function s $sum(:fps %p) { +@start + %f1 =s load %p + %p8 =l add 8, %p + %f2 =s load %p8 + %s =s add %f1, %f2 + ret %s +} + +# >>> driver +# typedef struct { float f1; char b; float f2; } fps; +# extern float sum(fps); +# int main() { fps x = { 1.23, -1, 2.34 }; return !(sum(x) == 1.23f+2.34f); } +# /* Note the f suffixes above are important +# * otherwise C does double operations. */ +# <<< diff --git a/src/test/abi3.ssa b/src/test/abi3.ssa new file mode 100644 index 0000000..608d1db --- /dev/null +++ b/src/test/abi3.ssa @@ -0,0 +1,43 @@ +type :four = {l, b, w} + +data $z = { w 0 } + +function $test() { + @start + %a =w loadw $z + %y =w add %a, %a + + %s =l alloc8 16 # allocate a :four struct + %s1 =l add %s, 12 # get address of the w + storel 4, %s # set the l + storew 5, %s1 # set the w + + # only the last argument should be on the stack + %f =l add $F, %y + %x =w call %f(w %y, w 1, w 2, w 3, :four %s, w 6) + + # store the result in the + # global variable a + + %x1 =w add %y, %x + storew %x1, $a + ret +} + +# >>> driver +# #include <stdio.h> +# struct four { long l; char c; int i; }; +# extern void test(void); +# int F(int a0, int a1, int a2, int a3, struct four s, int a6) { +# printf("%d %d %d %d %d %d %d\n", +# a0, a1, a2, a3, (int)s.l, s.i, a6); +# return 42; +# } +# int a; +# int main() { test(); printf("%d\n", a); return 0; } +# <<< + +# >>> output +# 0 1 2 3 4 5 6 +# 42 +# <<< diff --git a/src/test/abi4.ssa b/src/test/abi4.ssa new file mode 100644 index 0000000..4c3d89b --- /dev/null +++ b/src/test/abi4.ssa @@ -0,0 +1,38 @@ +# return a large struct to C + +type :mem = { b 17 } + +function $alpha(l %p, w %l, l %n) { +@ini + %pe =l add %p, %n +@lop + %p1 =l phi @ini %p, @lop %p2 + %l1 =w phi @ini %l, @lop %l2 + storeb %l1, %p1 + %p2 =l add %p1, 1 + %l2 =w add %l1, 1 + %c1 =w ceql %p1, %pe + jnz %c1, @end, @lop +@end + storeb 0, %pe + ret +} + +function :mem $test() { +@start + %p =l alloc4 17 + %r0 =w call $alpha(l %p, w 65, l 16) + ret %p +} + + +# >>> driver +# #include <stdio.h> +# typedef struct { char t[17]; } mem; +# extern mem test(void); +# int main() { mem m = test(); printf("%s\n", m.t); return 0; } +# <<< + +# >>> output +# ABCDEFGHIJKLMNOP +# <<< diff --git a/src/test/abi5.ssa b/src/test/abi5.ssa new file mode 100644 index 0000000..4c5eaea --- /dev/null +++ b/src/test/abi5.ssa @@ -0,0 +1,105 @@ +# returning structs from C + +type :st1 = { b 17 } +type :st2 = { w } +type :st3 = { s, w } +type :st4 = { w, d } +type :st5 = { s, l } +type :st6 = { b 16 } +type :st7 = { s, d } +type :st8 = { w 4 } + +data $fmt1 = { b "t1: %s\n", b 0 } +data $fmt2 = { b "t2: %d\n", b 0 } +data $fmt3 = { b "t3: %f %d\n", b 0 } +data $fmt4 = { b "t4: %d %f\n", b 0 } +data $fmt5 = { b "t5: %f %lld\n", b 0 } +data $fmt6 = { b "t6: %s\n", b 0 } +data $fmt7 = { b "t7: %f %f\n", b 0 } +data $fmt8 = { b "t8: %d %d %d %d\n", b 0 } + +function $test() { +@start + %r1 =:st1 call $t1() + %i1 =w call $printf(l $fmt1, l %r1) + + %r2 =:st2 call $t2() + %w2 =w loadw %r2 + %i2 =w call $printf(l $fmt2, w %w2) + + %r3 =:st3 call $t3() + %s3 =s loads %r3 + %r34 =l add %r3, 4 + %w3 =w loadw %r34 + %p3 =d exts %s3 + %i3 =w call $printf(l $fmt3, d %p3, w %w3) + + %r4 =:st4 call $t4() + %w4 =w loadw %r4 + %r48 =l add 8, %r4 + %d4 =d loadd %r48 + %i4 =w call $printf(l $fmt4, w %w4, d %d4) + + %r5 =:st5 call $t5() + %s5 =s loads %r5 + %d5 =d exts %s5 + %r58 =l add %r5, 8 + %l5 =l loadl %r58 + %i5 =w call $printf(l $fmt5, d %d5, l %l5) + + %r6 =:st6 call $t6() + %i6 =w call $printf(l $fmt6, l %r6) + + %r7 =:st7 call $t7() + %s7 =s loads %r7 + %d71 =d exts %s7 + %r78 =l add %r7, 8 + %d72 =d loadd %r78 + %i7 =w call $printf(l $fmt7, d %d71, d %d72) + + %r8 =:st8 call $t8() + %r84 =l add 4, %r8 + %r88 =l add 4, %r84 + %r812 =l add 4, %r88 + %w81 =w loadw %r8 + %w82 =w loadw %r84 + %w83 =w loadw %r88 + %w84 =w loadw %r812 + %i8 =w call $printf(l $fmt8, w %w81, w %w82, w %w83, w %w84) + + ret +} + + +# >>> driver +# #include <stdio.h> +# typedef struct { char t[17]; } st1; +# typedef struct { int i; } st2; +# typedef struct { float f; int i; } st3; +# typedef struct { int i; double d; } st4; +# typedef struct { float f; long l; } st5; +# typedef struct { char t[16]; } st6; +# typedef struct { float f; double d; } st7; +# typedef struct { int i[4]; } st8; +# extern void test(void); +# st1 t1() { return (st1){"abcdefghijklmnop"}; } +# st2 t2() { return (st2){2}; } +# st3 t3() { return (st3){3.0,30}; } +# st4 t4() { return (st4){4,-40}; } +# st5 t5() { return (st5){5.5,-55}; } +# st6 t6() { return (st6){"abcdefghijklmno"}; } +# st7 t7() { return (st7){7.77,77.7}; } +# st8 t8() { return (st8){-8,88,-888,8888}; } +# int main() { test(); return 0; } +# <<< + +# >>> output +# t1: abcdefghijklmnop +# t2: 2 +# t3: 3.000000 30 +# t4: 4 -40.000000 +# t5: 5.500000 -55 +# t6: abcdefghijklmno +# t7: 7.770000 77.700000 +# t8: -8 88 -888 8888 +# <<< diff --git a/src/test/align.ssa b/src/test/align.ssa new file mode 100644 index 0000000..84d1fb9 --- /dev/null +++ b/src/test/align.ssa @@ -0,0 +1,16 @@ +function $test() { +@start + %x =l alloc16 16 + %y =l add %x, 8 + %m =w rem %y, 16 + storew %m, %y + %n =w loadw %y + storew %n, $a + ret +} + +# >>> driver +# extern void test(void); +# int a; +# int main() { test(); return !(a == 8 || a == -8); } +# <<< diff --git a/src/test/collatz.ssa b/src/test/collatz.ssa new file mode 100644 index 0000000..373ecac --- /dev/null +++ b/src/test/collatz.ssa @@ -0,0 +1,61 @@ +# a solution for N=1000 to +# https://projecteuler.net/problem=14 +# we use a fast local array to +# memoize small collatz numbers + +function $test() { +@start + %mem =l alloc4 4000 +@loop + %n =w phi @start 1, @newm %n9, @oldm %n9 + %cmax =w phi @start 0, @newm %c, @oldm %cmax + %fin =w csltw %n, 1000 + jnz %fin, @cloop, @end +@cloop + %n0 =w phi @loop %n, @odd %n2, @even %n3 + %c0 =w phi @loop 0, @odd %c1, @even %c1 + %no1 =w cnew %n0, 1 + jnz %no1, @iter0, @endcl +@iter0 + %ism =w csltw %n0, %n + jnz %ism, @getmemo, @iter1 +@iter1 + %c1 =w add %c0, 1 + %p =w and %n0, 1 + jnz %p, @odd, @even +@odd + %n1 =w mul 3, %n0 + %n2 =w add %n1, 1 + jmp @cloop +@even + %n3 =w div %n0, 2 + jmp @cloop +@getmemo # get the count for n0 in mem + %n0l =l extsw %n0 + %idx0 =l mul %n0l, 4 + %loc0 =l add %idx0, %mem + %cn0 =w loadw %loc0 + %c2 =w add %c0, %cn0 +@endcl # store the count for n in mem + %c =w phi @getmemo %c2, @cloop %c0 + %nl =l extsw %n + %idx1 =l mul %nl, 4 + %loc1 =l add %idx1, %mem + storew %c, %loc1 + %n9 =w add 1, %n + %big =w cslew %cmax, %c + jnz %big, @newm, @oldm +@newm + jmp @loop +@oldm + jmp @loop +@end + storew %cmax, $a + ret +} + +# >>> driver +# extern void test(void); +# int a; +# int main() { test(); return !(a == 178); } +# <<< diff --git a/src/test/cprime.ssa b/src/test/cprime.ssa new file mode 100644 index 0000000..1ca60e1 --- /dev/null +++ b/src/test/cprime.ssa @@ -0,0 +1,103 @@ +# generated by Andrew Chambers' +# compiler from the C program +# following in comments + +function w $main() { +@start + %v0 =l alloc8 4 + %v1 =l alloc8 4 + %v2 =l alloc8 4 + %v3 =l alloc8 4 + %v4 =l alloc8 4 + storew 5, %v1 + storew 11, %v2 + storew 12, %v3 +@L0 + %v5 =w loadw %v1 + %v6 =w cnew %v5, 201 + jnz %v6, @L8, @L1 +@L8 + storew 1, %v4 + %v7 =w loadw %v3 + %v8 =w rem %v7, 2 + %v9 =w ceqw %v8, 0 + jnz %v9, @L9, @L5 +@L9 + storew 0, %v4 +@L5 + storew 3, %v0 +@L2 + %v10 =w loadw %v0 + %v11 =w loadw %v3 + %v12 =w csltw %v10, %v11 + jnz %v12, @L10, @L3 +@L10 + %v13 =w loadw %v3 + %v14 =w loadw %v0 + %v15 =w rem %v13, %v14 + %v16 =w ceqw %v15, 0 + jnz %v16, @L11, @L4 +@L11 + storew 0, %v4 + jmp @L3 +@L4 + %v17 =w loadw %v0 + %v18 =w add %v17, 2 + storew %v18, %v0 + jmp @L2 +@L3 + %v19 =w loadw %v4 + jnz %v19, @L12, @L6 +@L12 + %v20 =w loadw %v3 + storew %v20, %v2 + %v21 =w loadw %v1 + %v22 =w add %v21, 1 + storew %v22, %v1 +@L6 + %v23 =w loadw %v3 + %v24 =w add %v23, 1 + storew %v24, %v3 + jmp @L0 +@L1 + %v25 =w loadw %v2 + %v26 =w cnew %v25, 1229 + jnz %v26, @L13, @L7 +@L13 + ret 1 +@L7 + ret 0 +@end + ret 0 +} + +# int +# main() +# { +# int i, n, p, next, isprime; +# +# n = 5; +# p = 11; +# next = 12; +# while(n != 201) { +# isprime = 1; +# if(next % 2 == 0) { +# isprime = 0; +# } else { +# for(i = 3; i < next; i = i + 2) { +# if(next % i == 0) { +# isprime = 0; +# break; +# } +# } +# } +# if(isprime) { +# p = next; +# n = n + 1; +# } +# next = next + 1; +# } +# if(p != 1229) +# return 1; +# return 0; +# } diff --git a/src/test/cup.ssa b/src/test/cup.ssa new file mode 100644 index 0000000..013394f --- /dev/null +++ b/src/test/cup.ssa @@ -0,0 +1,17 @@ +# counts up from -1988 to 1991 + +function $test() { +@start +@loop + %n0 =l phi @start -1988, @loop %n1 + %n1 =l add 1, %n0 + %cmp =w cslel 1991, %n1 + jnz %cmp, @end, @loop +@end + ret +} + +# >>> driver +# extern void test(void); +# int main() { test(); return 0; } +# <<< diff --git a/src/test/dark.ssa b/src/test/dark.ssa new file mode 100644 index 0000000..5046af3 --- /dev/null +++ b/src/test/dark.ssa @@ -0,0 +1,30 @@ +# a hack example, +# we use a dark type to get +# a pointer to the stack. + +type :magic = align 1 { 0 } + +data $ret = { l 0 } + +function $test(:magic %p) { +@start + %av =w loadw $a + %a1 =w add 1, %av + storew %a1, $a # increment $a + %r1 =l loadl $ret # fetch from $ret + %p1 =l add %p, -8 + %r2 =l loadl %p1 # get the return address + storel %r2, $ret # store it in $ret + %c =w ceql %r1, %r2 + jnz %c, @fin, @cal +@cal + %i =w call $test() # no argument given, intentionally! +@fin + ret +} + +# >>> driver +# extern void test(void); +# int a = 2; +# int main() { test(); return !(a == 5); } +# <<< diff --git a/src/test/double.ssa b/src/test/double.ssa new file mode 100644 index 0000000..d885d28 --- /dev/null +++ b/src/test/double.ssa @@ -0,0 +1,24 @@ +function $test() { +@start + %x1 =d copy d_0.1 + %x2 =d add d_0.2, %x1 + %x3 =d sub %x2, d_0.3 + +@loop + %x4 =d phi @start %x3, @loop %x5 + %i1 =w phi @start 0, @loop %i2 + %x5 =d add %x4, %x4 + %i2 =w add %i1, 1 + %c0 =w cled %x5, 4607182418800017408 # d_1.0 + jnz %c0, @loop, @end + +@end + storew %i2, $a + ret +} + +# >>> driver +# extern void test(void); +# int a; +# int main() { test(); return !(a == 55); } +# <<< diff --git a/src/test/echo.ssa b/src/test/echo.ssa new file mode 100644 index 0000000..d3c8a25 --- /dev/null +++ b/src/test/echo.ssa @@ -0,0 +1,32 @@ +function w $main(w %argc, l %argv) { +@start + %fmt =l alloc8 8 + storel 1663398693, %fmt # "%s%c" + %av0 =l add %argv, 8 + %ac0 =w sub %argc, 1 +@loop + %av =l phi @start %av0, @loop2 %av1 + %ac =w phi @start %ac0, @loop2 %ac1 + %c0 =w ceqw %ac, 0 + jnz %c0, @end, @loop1 +@loop1 + %c1 =w ceqw %ac, 1 + jnz %c1, @last, @nolast +@last + jmp @loop2 +@nolast + jmp @loop2 +@loop2 + %sep =w phi @last 10, @nolast 32 + %arg =l loadl %av + %r =w call $printf(l %fmt, l %arg, w %sep) + %av1 =l add %av, 8 + %ac1 =w sub %ac, 1 + jmp @loop +@end + ret 0 +} + +# >>> output +# a b c +# <<< diff --git a/src/test/eucl.ssa b/src/test/eucl.ssa new file mode 100644 index 0000000..f50fd2c --- /dev/null +++ b/src/test/eucl.ssa @@ -0,0 +1,24 @@ +# euclide's algorithm in ssa +# it is a fairly interesting +# ssa program because of the +# swap of b and a + +function $test() { +@start + +@loop + %a =w phi @start 380, @loop %r + %b =w phi @start 747, @loop %a + %r =w rem %b, %a + jnz %r, @loop, @end + +@end + storew %a, $a + ret +} + +# >>> driver +# extern void test(void); +# int a; +# int main() { test(); return !(a == 1); } +# <<< diff --git a/src/test/euclc.ssa b/src/test/euclc.ssa new file mode 100644 index 0000000..c76db2f --- /dev/null +++ b/src/test/euclc.ssa @@ -0,0 +1,29 @@ +function w $test() { +@l0 + %a =l alloc4 4 + %b =l alloc4 4 + %r =l alloc4 4 + storew 747, %a + storew 380, %b +@l1 + %t4 =w loadw %b + jnz %t4, @l2, @l3 +@l2 + %t7 =w loadw %a + %t8 =w loadw %b + %t6 =w rem %t7, %t8 + storew %t6, %r + %t10 =w loadw %b + storew %t10, %a + %t12 =w loadw %r + storew %t12, %b + jmp @l1 +@l3 + %t13 =w loadw %a + ret %t13 +} + +# >>> driver +# extern int test(void); +# int main() { return !(test() == 1); } +# <<< diff --git a/src/test/fpcnv.ssa b/src/test/fpcnv.ssa new file mode 100644 index 0000000..5fd3be9 --- /dev/null +++ b/src/test/fpcnv.ssa @@ -0,0 +1,27 @@ +# floating point casts and conversions + +function s $fneg(s %f) { +@fneg + %b0 =w cast %f + %b1 =w xor 2147483648, %b0 + %rs =s cast %b1 + ret %rs +} + +function d $ftrunc(d %f) { +@ftrunc + %l0 =l ftosi %f + %rt =d sitof %l0 + ret %rt +} + +# >>> driver +# extern float fneg(float); +# extern double ftrunc(double); +# int main() { +# if (fneg(1.23f) != -1.23f) return 1; +# if (ftrunc(3.1415) != 3.0) return 2; +# if (ftrunc(-1.234) != -1.0) return 3; +# return 0; +# } +# <<< diff --git a/src/test/go.sh b/src/test/go.sh new file mode 100755 index 0000000..35bf525 --- /dev/null +++ b/src/test/go.sh @@ -0,0 +1,116 @@ +#!/bin/sh + +TMP=/tmp/qbe.zzzz + +DRV=$TMP.c +ASM=$TMP.s +BIN=$TMP.bin +OUT=$TMP.out + +cleanup() { + rm -f $DRV $ASM $BIN $OUT +} + +extract() { + WHAT="$1" + FILE="$2" + + awk " + /^# >>> $WHAT/ { + p = 1 + next + } + /^# <<</ { + if (p) + p = 0 + } + p + " $FILE \ + | sed -e 's/# //' \ + | sed -e 's/#$//' +} + +once() { + T="$1" + + if ! test -f $T + then + echo "invalid test file $T" >&2 + exit 1 + fi + + echo "$T... " + + if ! ./qbe $T -o $ASM + then + echo "[qbe fail]" + return 1 + fi + + extract driver $T > $DRV + extract output $T > $OUT + + if test -s $DRV + then + LNK="$DRV $ASM" + else + LNK="$ASM" + fi + + if ! cc -g -o $BIN $LNK + then + echo "[cc fail]" + return 1 + fi + + if test -s $OUT + then + $BIN a b c | diff - $OUT + RET=$? + REASON="output" + else + $BIN a b c + RET=$? + REASON="returned $RET" + fi + + if test $RET -ne 0 + then + echo "[$REASON fail]" + return 1 + fi + + printf "\033[1A\033[45C[ok]\n" +} + + +#trap cleanup TERM QUIT + +if test -z "$1" +then + echo "usage: test/go.sh {all, SSAFILE}" 2>&1 + exit 1 +fi + +case $1 in + "all") + F=0 + for T in test/[!_]*.ssa + do + once $T + F=`expr $F + $?` + done + if test $F -ge 1 + then + echo + echo "$F test(s) failed!" + else + echo + echo "All is fine!" + fi + ;; + *) + once $1 + exit $? + ;; +esac diff --git a/src/test/loop.ssa b/src/test/loop.ssa new file mode 100644 index 0000000..c8c4ee0 --- /dev/null +++ b/src/test/loop.ssa @@ -0,0 +1,23 @@ +# simple looping program +# sums all integers from 100 to 0 + +function $test() { +@start + +@loop + %s =w phi @start 0, @loop %s1 + %n =w phi @start 100, @loop %n1 + %s1 =w add %s, %n + %n1 =w sub %n, 1 + jnz %n1, @loop, @end + +@end + storew %s1, $a + ret +} + +# >>> driver +# extern void test(void); +# int a; +# int main() { test(); return !(a == 5050); } +# <<< diff --git a/src/test/mandel.ssa b/src/test/mandel.ssa new file mode 100644 index 0000000..efefeb3 --- /dev/null +++ b/src/test/mandel.ssa @@ -0,0 +1,123 @@ +# Print the Mandelbrot set on the +# terminal line output. + +function w $mandel(d %x, d %y) { +@mandel + %cr =d sub %y, d_0.5 + %ci =d copy %x +@loop + %i =w phi @mandel 0, @loop1 %i1 + %zr =d phi @mandel d_0, @loop1 %zr1 + %zi =d phi @mandel d_0, @loop1 %zi1 + %i1 =w add 1, %i + %tmp =d mul %zr, %zi + %zr2 =d mul %zr, %zr + %zi2 =d mul %zi, %zi + %zrx =d sub %zr2, %zi2 + %zr1 =d add %zrx, %cr + %zix =d add %tmp, %tmp + %zi1 =d add %zix, %ci + %sum =d add %zi2, %zr2 + %cmp1 =w cgtd %sum, d_16 + jnz %cmp1, @reti, @loop1 +@loop1 + %cmp2 =w csgtw %i1, 1000 + jnz %cmp2, @ret0, @loop +@reti + ret %i1 +@ret0 + ret 0 +} + +function w $main() { +@main +@loopy + %y =d phi @main d_-1, @loopy1 %y1 +@loopx + %x =d phi @loopy d_-1, @loopx1 %x1 + %i =w call $mandel(d %x, d %y) + jnz %i, @out, @in +@in + %r0 =w call $putchar(w 42) # '*' + jmp @loopx1 +@out + %r1 =w call $putchar(w 32) # ' ' + jmp @loopx1 +@loopx1 + %x1 =d add %x, d_0.032 + %cmp1 =w cgtd %x1, d_1 + jnz %cmp1, @loopy1, @loopx +@loopy1 + %r2 =w call $putchar(w 10) # '\n' + %y1 =d add %y, d_0.032 + %cmp2 =w cgtd %y1, d_1 + jnz %cmp2, @ret, @loopy +@ret + ret 0 +} + +# >>> output +# # +# # +# # +# # +# * # +# **** # +# **** # +# *** # +# ***** # +# ********* # +# ************ # +# ***************** # +# **************** # +# *************** # +# **************** # +# **************** # +# ***************** # +# **************** # +# **************** # +# ************** # +# ************* # +# ************ # +# ********* # +# ***** # +# *********** # +# ***************** # +# ********************** # +# * *********************** ** # +# *************************** # +# ***************************** # +# * ******************************* ** # +# ** *********************************** # +# *********************************** * # +# *********************************** # +# ************************************* # +# ************************************* # +# *************************************** # +# *************************************** # +# *************************************** # +# **************************************** # +# * **************************************** # +# ********************************************** **** # +# **************************************************** # +# * ***************************************************** # +# * ***************************************************** # +# ***** **************************************** **** # +# * **************************************** * # +# **************************************** # +# *************************************** # +# **************************************** # +# *************************************** # +# **************************************** # +# ************************************ # +# *********************************** # +# ********************************* # +# ************************************ # +# *** ************* ************** *** # +# *********** ************ ** # +# ******** ******** # +# ** * * # +# # +# # +# # +# <<< diff --git a/src/test/max.ssa b/src/test/max.ssa new file mode 100644 index 0000000..547e9d4 --- /dev/null +++ b/src/test/max.ssa @@ -0,0 +1,33 @@ +# find the maximum value +# in a nul-terminated array +# of unsigned bytes +# +# the output is stored in $a + +data $arr = { b 10, b -60, b 10, b 100, b 200, b 0 } + +function $test() { +@start +@loop + %max =w phi @start -1, @new %byt, @old %max + %loc =l phi @start $arr, @new %loc1, @old %loc1 + %byt =w loadub %loc + %loc1 =l add 1, %loc + jnz %byt, @iter, @end +@iter + %cmp =w cslew %max, %byt + jnz %cmp, @new, @old +@new + jmp @loop +@old + jmp @loop +@end + storew %max, $a + ret +} + +# >>> driver +# extern void test(void); +# int a; +# int main() { test(); return !(a == 200); } +# <<< diff --git a/src/test/prime.ssa b/src/test/prime.ssa new file mode 100644 index 0000000..12d0273 --- /dev/null +++ b/src/test/prime.ssa @@ -0,0 +1,32 @@ +# find the 10,001st prime +# store it in a + +function $test() { +@start +@loop + %n =w phi @start 5, @tloop %n, @yes %n1 + %p =w phi @start 13, @tloop %p1, @yes %p1 + %p1 =w add %p, 2 +@tloop + %t =w phi @loop 3, @next %t1 + %r =w rem %p, %t + jnz %r, @next, @loop +@next + %t1 =w add 2, %t + %tsq =w mul %t1, %t1 + %c0 =w csgtw %tsq, %p + jnz %c0, @yes, @tloop +@yes + %n1 =w add 1, %n + %c1 =w ceqw 10001, %n1 + jnz %c1, @end, @loop +@end + storew %p, $a + ret +} + +# >>> driver +# extern void test(void); +# int a; +# int main() { test(); return !(a == 104743); } +# <<< diff --git a/src/test/puts10.ssa b/src/test/puts10.ssa new file mode 100644 index 0000000..1dcf227 --- /dev/null +++ b/src/test/puts10.ssa @@ -0,0 +1,29 @@ +function $main() { +@start + %y =l alloc4 4 + %y1 =l add %y, 1 + storeb 0, %y1 +@loop + %n =w phi @start 0, @loop %n1 + %c =w add %n, 48 + storeb %c, %y + %r =w call $puts(l %y) + %n1 =w add %n, 1 + %cmp =w cslew %n1, 9 + jnz %cmp, @loop, @end +@end + ret +} + +# >>> output +# 0 +# 1 +# 2 +# 3 +# 4 +# 5 +# 6 +# 7 +# 8 +# 9 +# <<< diff --git a/src/test/sum.ssa b/src/test/sum.ssa new file mode 100644 index 0000000..266054e --- /dev/null +++ b/src/test/sum.ssa @@ -0,0 +1,31 @@ +# Simple test for addressing modes. + +function w $sum(l %arr, w %num) { +@start +@loop + %n1 =w phi @start %num, @loop1 %n2 + %s0 =w phi @start 0, @loop1 %s1 + %n2 =w sub %n1, 1 + %c =w cslew %n1, 0 + jnz %c, @end, @loop1 +@loop1 + %idx0 =l extsw %n2 + %idx1 =l mul 4, %idx0 + %idx2 =l add %idx1, %arr + %w =w loadw %idx2 + %s1 =w add %w, %s0 + jmp @loop +@end + ret %s0 +} + +# >>> driver +# extern int sum(int *, int); +# int arr[] = { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21 }; +# #define N sizeof arr / sizeof arr[0] +# int main() { +# int i, s; +# for (s=i=0; i<N; i++) s+=arr[i]; +# return !(sum(arr, N) == s); +# } +# <<< diff --git a/src/tools/abi.ml b/src/tools/abi.ml new file mode 100644 index 0000000..d845c74 --- /dev/null +++ b/src/tools/abi.ml @@ -0,0 +1,532 @@ +(* fuzzer *) + +type _ bty = + | Char: int bty + | Short: int bty + | Int: int bty + | Long: int bty + | Float: float bty + | Double: float bty + +type _ sty = + | Field: 'a bty * 'b sty -> ('a * 'b) sty + | Empty: unit sty + +type _ aty = + | Base: 'a bty -> 'a aty + | Struct: 'a sty -> 'a aty + +type anyb = AB: _ bty -> anyb (* kinda boring... *) +type anys = AS: _ sty -> anys +type anya = AA: _ aty -> anya +type testb = TB: 'a bty * 'a -> testb +type testa = TA: 'a aty * 'a -> testa + + +let align a x = + let m = x mod a in + if m <> 0 then x + (a-m) else x + +let btysize: type a. a bty -> int = function + | Char -> 1 + | Short -> 2 + | Int -> 4 + | Long -> 8 + | Float -> 4 + | Double -> 8 + +let btyalign = btysize + +let styempty: type a. a sty -> bool = function + | Field _ -> false + | Empty -> true + +let stysize s = + let rec f: type a. int -> a sty -> int = + fun sz -> function + | Field (b, s) -> + let a = btyalign b in + f (align a sz + btysize b) s + | Empty -> sz in + f 0 s + +let rec styalign: type a. a sty -> int = function + | Field (b, s) -> max (btyalign b) (styalign s) + | Empty -> 1 + + +(* Generate types and test vectors. *) +module Gen = struct + module R = Random + + let init = function + | None -> + let f = open_in "/dev/urandom" in + let seed = + Char.code (input_char f) lsl 8 + + Char.code (input_char f) in + close_in f; + R.init seed; + seed + | Some seed -> + R.init seed; + seed + + let int sz = + let bound = 1 lsl (8 * min sz 3 - 1) in + let i = R.int bound in + if R.bool () then - i else i + + let float () = + let f = R.float 1000. in + if R.bool () then -. f else f + + let testv: type a. a aty -> a = + let tb: type a. a bty -> a = function (* eh, dry... *) + | Float -> float () + | Double -> float () + | Char -> int (btysize Char) + | Short -> int (btysize Short) + | Int -> int (btysize Int) + | Long -> int (btysize Long) in + let rec ts: type a. a sty -> a = function + | Field (b, s) -> (tb b, ts s) + | Empty -> () in + function + | Base b -> tb b + | Struct s -> ts s + + let b () = (* uniform *) + match R.int 6 with + | 0 -> AB Char + | 1 -> AB Short + | 2 -> AB Int + | 3 -> AB Long + | 4 -> AB Float + | _ -> AB Double + + let smax = 5 (* max elements in structs *) + let structp = 0.3 (* odds of having a struct type *) + let amax = 8 (* max function arguments *) + + let s () = + let rec f n = + if n = 0 then AS Empty else + let AB bt = b () in + let AS st = f (n-1) in + AS (Field (bt, st)) in + f (1 + R.int (smax-1)) + + let a () = + if R.float 1.0 > structp then + let AB bt = b () in + AA (Base bt) + else + let AB bt = b () in + let AS st = s () in + AA (Struct (Field (bt, st))) + + let test () = + let AA ty = a () in + let t = testv ty in + TA (ty, t) + + let tests () = + let rec f n = + if n = 0 then [] else + test () :: f (n-1) in + f (R.int amax) + +end + + +(* Code generation for C *) +module OutC = struct + open Printf + + let ctypelong oc name = + let cb: type a. a bty -> unit = function + | Char -> fprintf oc "char" + | Short -> fprintf oc "short" + | Int -> fprintf oc "int" + | Long -> fprintf oc "long" + | Float -> fprintf oc "float" + | Double -> fprintf oc "double" in + let rec cs: type a. int -> a sty -> unit = + fun i -> function + | Field (b, s) -> + cb b; + fprintf oc " f%d; " i; + cs (i+1) s; + | Empty -> () in + function + | Base b -> + cb b; + | Struct s -> + fprintf oc "struct %s { " name; + cs 1 s; + fprintf oc "}"; + () + + let ctype: type a. out_channel -> string -> a aty -> unit = + fun oc name -> function + | Struct _ -> fprintf oc "struct %s" name + | t -> ctypelong oc "" t + + let base: type a. out_channel -> a bty * a -> unit = + fun oc -> function + | Char, i -> fprintf oc "%d" i + | Short, i -> fprintf oc "%d" i + | Int, i -> fprintf oc "%d" i + | Long, i -> fprintf oc "%d" i + | Float, f -> fprintf oc "%ff" f + | Double, f -> fprintf oc "%f" f + + let init oc name (TA (ty, t)) = + let inits s = + let rec f: type a. a sty * a -> unit = function + | Field (b, s), (tb, ts) -> + base oc (b, tb); + fprintf oc ", "; + f (s, ts) + | Empty, () -> () in + fprintf oc "{ "; + f s; + fprintf oc "}"; in + ctype oc name ty; + fprintf oc " %s = " name; + begin match (ty, t) with + | Base b, tb -> base oc (b, tb) + | Struct s, ts -> inits (s, ts) + end; + fprintf oc ";\n"; + () + + let extension = ".c" + + let comment oc s = + fprintf oc "/* %s */\n" s + + let prelude oc = List.iter (fprintf oc "%s\n") + [ "#include <stdio.h>" + ; "#include <stdlib.h>" + ; "" + ; "static void fail(char *chk)" + ; "{" + ; "\tfprintf(stderr, \"fail: checking %s\\n\", chk);" + ; "\tabort();" + ; "}" + ; "" + ] + + let typedef oc name = function + | TA (Struct ts, _) -> + ctypelong oc name (Struct ts); + fprintf oc ";\n"; + | _ -> () + + let check oc name = + let chkbase: type a. string -> a bty * a -> unit = + fun name t -> + fprintf oc "\tif (%s != " name; + base oc t; + fprintf oc ")\n\t\tfail(%S);\n" name; in + function + | TA (Base b, tb) -> chkbase name (b, tb) + | TA (Struct s, ts) -> + let rec f: type a. int -> a sty * a -> unit = + fun i -> function + | Field (b, s), (tb, ts) -> + chkbase (Printf.sprintf "%s.f%d" name i) (b, tb); + f (i+1) (s, ts); + | Empty, () -> () in + f 1 (s, ts) + + let argname i = "arg" ^ string_of_int (i+1) + + let proto oc (TA (tret, _)) args = + ctype oc "ret" tret; + fprintf oc " f("; + let narg = List.length args in + List.iteri (fun i (TA (targ, _)) -> + ctype oc (argname i) targ; + fprintf oc " %s" (argname i); + if i <> narg-1 then + fprintf oc ", "; + ) args; + fprintf oc ")"; + () + + let caller oc ret args = + let narg = List.length args in + prelude oc; + typedef oc "ret" ret; + List.iteri (fun i arg -> + typedef oc (argname i) arg; + ) args; + proto oc ret args; + fprintf oc ";\n\nint main()\n{\n"; + List.iteri (fun i arg -> + fprintf oc "\t"; + init oc (argname i) arg; + ) args; + fprintf oc "\t"; + let TA (tret, _) = ret in + ctype oc "ret" tret; + fprintf oc " ret;\n\n"; + fprintf oc "\tret = f("; + List.iteri (fun i _ -> + fprintf oc "%s" (argname i); + if i <> narg-1 then + fprintf oc ", "; + ) args; + fprintf oc ");\n"; + check oc "ret" ret; + fprintf oc "\n\treturn 0;\n}\n"; + () + + let callee oc ret args = + prelude oc; + typedef oc "ret" ret; + List.iteri (fun i arg -> + typedef oc (argname i) arg; + ) args; + fprintf oc "\n"; + proto oc ret args; + fprintf oc "\n{\n\t"; + init oc "ret" ret; + fprintf oc "\n"; + List.iteri (fun i arg -> + check oc (argname i) arg; + ) args; + fprintf oc "\n\treturn ret;\n}\n"; + () + +end + +(* Code generation for QBE *) +module OutIL = struct + open Printf + + let comment oc s = + fprintf oc "# %s\n" s + + let tmp, lbl = + let next = ref 0 in + (fun () -> incr next; "%t" ^ (string_of_int !next)), + (fun () -> incr next; "@l" ^ (string_of_int !next)) + + let bvalue: type a. a bty * a -> string = function + | Char, i -> sprintf "%d" i + | Short, i -> sprintf "%d" i + | Int, i -> sprintf "%d" i + | Long, i -> sprintf "%d" i + | Float, f -> sprintf "s_%f" f + | Double, f -> sprintf "d_%f" f + + let btype: type a. a bty -> string = function + | Char -> "w" + | Short -> "w" + | Int -> "w" + | Long -> "l" + | Float -> "s" + | Double -> "d" + + let extension = ".ssa" + + let argname i = "arg" ^ string_of_int (i+1) + + let siter oc base s g = + let rec f: type a. int -> int -> a sty * a -> unit = + fun id off -> function + | Field (b, s), (tb, ts) -> + let off = align (btyalign b) off in + let addr = tmp () in + fprintf oc "\t%s =l add %d, %s\n" addr off base; + g id addr (TB (b, tb)); + f (id + 1) (off + btysize b) (s, ts); + | Empty, () -> () in + f 0 0 s + + let bmemtype b = + if AB b = AB Char then "b" else + if AB b = AB Short then "h" else + btype b + + let init oc = function + | TA (Base b, tb) -> bvalue (b, tb) + | TA (Struct s, ts) -> + let base = tmp () in + fprintf oc "\t%s =l alloc%d %d\n" + base (styalign s) (stysize s); + siter oc base (s, ts) + begin fun _ addr (TB (b, tb)) -> + fprintf oc "\tstore%s %s, %s\n" + (bmemtype b) (bvalue (b, tb)) addr; + end; + base + + let check oc id name = + let bcheck = fun id name (b, tb) -> + let tcmp = tmp () in + let nxtl = lbl () in + fprintf oc "\t%s =w ceq%s %s, %s\n" + tcmp (btype b) name (bvalue (b, tb)); + fprintf oc "\tstorew %d, %%failcode\n" id; + fprintf oc "\tjnz %s, %s, @fail\n" tcmp nxtl; + fprintf oc "%s\n" nxtl; in + function + | TA (Base Char, i) -> + let tval = tmp () in + fprintf oc "\t%s =w extsb %s\n" tval name; + bcheck id tval (Int, i) + | TA (Base Short, i) -> + let tval = tmp () in + fprintf oc "\t%s =w extsh %s\n" tval name; + bcheck id tval (Int, i) + | TA (Base b, tb) -> + bcheck id name (b, tb) + | TA (Struct s, ts) -> + siter oc name (s, ts) + begin fun id' addr (TB (b, tb)) -> + let tval = tmp () in + let lsuffix = + if AB b = AB Char then "sb" else + if AB b = AB Short then "sh" else + "" in + fprintf oc "\t%s =%s load%s %s\n" + tval (btype b) lsuffix addr; + bcheck (100*id + id'+1) tval (b, tb); + end; + () + + let ttype name = function + | TA (Base b, _) -> btype b + | TA (Struct _, _) -> ":" ^ name + + let typedef oc name = + let rec f: type a. a sty -> unit = function + | Field (b, s) -> + fprintf oc "%s" (bmemtype b); + if not (styempty s) then + fprintf oc ", "; + f s; + | Empty -> () in + function + | TA (Struct ts, _) -> + fprintf oc "type :%s = { " name; + f ts; + fprintf oc " }\n"; + | _ -> () + + let postlude oc = List.iter (fprintf oc "%s\n") + [ "@fail" + ; "# failure code" + ; "\t%fcode =w loadw %failcode" + ; "\t%f0 =w call $printf(l $failstr, w %fcode)" + ; "\t%f1 =w call $abort()" + ; "\tret 0" + ; "}" + ; "" + ; "data $failstr = { b \"fail on check %d\\n\", b 0 }" + ] + + let caller oc ret args = + let narg = List.length args in + List.iteri (fun i arg -> + typedef oc (argname i) arg; + ) args; + typedef oc "ret" ret; + fprintf oc "\nfunction w $main() {\n"; + fprintf oc "@start\n"; + fprintf oc "\t%%failcode =l alloc4 4\n"; + let targs = List.mapi (fun i arg -> + comment oc ("define argument " ^ (string_of_int (i+1))); + (ttype (argname i) arg, init oc arg) + ) args in + comment oc "call test function"; + fprintf oc "\t%%ret =%s call $f(" (ttype "ret" ret); + List.iteri (fun i (ty, tmp) -> + fprintf oc "%s %s" ty tmp; + if i <> narg-1 then + fprintf oc ", "; + ) targs; + fprintf oc ")\n"; + comment oc "check the return value"; + check oc 0 "%ret" ret; + fprintf oc "\tret 0\n"; + postlude oc; + () + + let callee oc ret args = + let narg = List.length args in + List.iteri (fun i arg -> + typedef oc (argname i) arg; + ) args; + typedef oc "ret" ret; + fprintf oc "\nfunction %s $f(" (ttype "ret" ret); + List.iteri (fun i arg -> + let a = argname i in + fprintf oc "%s %%%s" (ttype a arg) a; + if i <> narg-1 then + fprintf oc ", "; + ) args; + fprintf oc ") {\n"; + fprintf oc "@start\n"; + fprintf oc "\t%%failcode =l alloc4 4\n"; + List.iteri (fun i arg -> + comment oc ("checking argument " ^ (string_of_int (i+1))); + check oc (i+1) ("%" ^ argname i) arg; + ) args; + comment oc "define the return value"; + let rettmp = init oc ret in + fprintf oc "\tret %s\n" rettmp; + postlude oc; + () + +end + + +module type OUT = sig + val extension: string + val comment: out_channel -> string -> unit + val caller: out_channel -> testa -> testa list -> unit + val callee: out_channel -> testa -> testa list -> unit +end + +let _ = + let usage code = + Printf.eprintf "usage: abi.ml [-s SEED] DIR {c,ssa} {c,ssa}\n"; + exit code in + + let outmod = function + | "c" -> (module OutC : OUT) + | "ssa" -> (module OutIL: OUT) + | _ -> usage 1 in + + let seed, dir, mcaller, mcallee = + match Sys.argv with + | [| _; "-s"; seed; dir; caller; callee |] -> + let seed = + try Some (int_of_string seed) with + Failure _ -> usage 1 in + seed, dir, outmod caller, outmod callee + | [| _; dir; caller; callee |] -> + None, dir, outmod caller, outmod callee + | [| _; "-h" |] -> + usage 0 + | _ -> + usage 1 in + + let seed = Gen.init seed in + let tret = Gen.test () in + let targs = Gen.tests () in + let module OCaller = (val mcaller : OUT) in + let module OCallee = (val mcallee : OUT) in + let ocaller = open_out (dir ^ "/caller" ^ OCaller.extension) in + let ocallee = open_out (dir ^ "/callee" ^ OCallee.extension) in + OCaller.comment ocaller (Printf.sprintf "seed %d" seed); + OCallee.comment ocallee (Printf.sprintf "seed %d" seed); + OCaller.caller ocaller tret targs; + OCallee.callee ocallee tret targs; + () diff --git a/src/tools/abitest.sh b/src/tools/abitest.sh new file mode 100755 index 0000000..d5b16e5 --- /dev/null +++ b/src/tools/abitest.sh @@ -0,0 +1,104 @@ +#!/bin/sh + +OCAMLC=/usr/bin/ocamlc +QBE=`pwd`/qbe + +failure() { + echo "Failure at stage:" $1 >&2 + exit 1 +} + +cleanup() { + rm -fr $TMP +} + +init() { + cp tools/abi.ml $TMP + pushd $TMP > /dev/null + + cat > Makefile << EOM + +.PHONY: test +test: caller.o callee.o + c99 -o \$@ caller.o callee.o +%.o: %.c + c99 -c -o \$@ \$< +%.o: %.ssa + $QBE -o \$*.s \$< + c99 -c -o \$@ \$*.s + +EOM + + if ! $OCAMLC abi.ml -o gentest + then + popd > /dev/null + cleanup + failure "abifuzz compilation" + fi + popd > /dev/null +} + +once() { + if test -z "$3" + then + $TMP/gentest $TMP $1 $2 + else + $TMP/gentest -s $3 $TMP $1 $2 + fi + make -C $TMP test > /dev/null || failure "building" + $TMP/test || failure "runtime" +} + +usage() { + echo "usage: abitest.sh [-callssa] [-callc] [-s SEED] [-n ITERATIONS]" >&2 + exit 1 +} + +N=1 +CALLER=c +CALLEE=ssa + +while test -n "$1" +do + case "$1" in + "-callssa") + ;; + "-callc") + CALLER=ssa + CALLEE=c + ;; + "-s") + test -n "$2" || usage + shift + SEED="$1" + ;; + "-n") + test -n "$2" || usage + shift + N="$1" + ;; + *) + usage + ;; + esac + shift +done + +TMP=`mktemp -d abifuzz.XXXXXX` + +init + +if test -n "$S" +then + once $CALLER $CALLEE $SEED +else + for n in `seq $N` + do + once $CALLER $CALLEE + echo "$n" | grep "00$" + done +fi + +echo "All done." + +cleanup diff --git a/src/tools/fptox.c b/src/tools/fptox.c new file mode 100644 index 0000000..a2bc155 --- /dev/null +++ b/src/tools/fptox.c @@ -0,0 +1,18 @@ +#include <stdio.h> +#include <stdlib.h> + +int +main(int ac, char *av[]) +{ + double d; + float f; + + if (ac < 2) { + usage: + fputs("usage: fptox NUMBER\n", stderr); + return 1; + } + f = d = strtod(av[1], 0); + printf("0x%08x 0x%016llx\n", *(unsigned *)&f, *(unsigned long long*)&d); + return 0; +} diff --git a/src/tools/pmov.c b/src/tools/pmov.c new file mode 100644 index 0000000..efbecd7 --- /dev/null +++ b/src/tools/pmov.c @@ -0,0 +1,252 @@ +/*% rm -f rega.o main.o && cc -g -std=c99 -Wall -DTEST_PMOV -o # % *.o + * + * This is a test framwork for the dopm() function + * in rega.c, use it when you want to modify it or + * all the parallel move functions. + * + * You might need to decrease NIReg to see it + * terminate, I used NIReg == 7 at most. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static void assert_test(char *, int), fail(void), iexec(int *); + +#include "../rega.c" + +static RMap mbeg; +static Ins ins[NIReg], *ip; +static Blk dummyb = { .ins = ins }; + +int +main() +{ + Ins *i1; + unsigned long long tm, rm, cnt; + RMap mend; + int reg[NIReg], val[NIReg+1]; + int t, i, r, nr; + + tmp = (Tmp[Tmp0+NIReg]){{{0}}}; + for (t=0; t<Tmp0+NIReg; t++) + if (t >= Tmp0) { + tmp[t].cls = Kw; + tmp[t].hint.r = -1; + tmp[t].hint.m = 0; + tmp[t].slot = -1; + sprintf(tmp[t].name, "tmp%d", t-Tmp0+1); + } + + bsinit(mbeg.b, Tmp0+NIReg); + bsinit(mend.b, Tmp0+NIReg); + cnt = 0; + for (tm = 0; tm < 1ull << (2*NIReg); tm++) { + mbeg.n = 0; + bszero(mbeg.b); + ip = ins; + + /* find what temporaries are in copy and + * wether or not they are in register + */ + for (t=0; t<NIReg; t++) + switch ((tm >> (2*t)) & 3) { + case 0: + /* not in copy, not in reg */ + break; + case 1: + /* not in copy, in reg */ + radd(&mbeg, Tmp0+t, t+1); + break; + case 2: + /* in copy, not in reg */ + *ip++ = (Ins){OCopy, TMP(Tmp0+t), {R, R}, Kw}; + break; + case 3: + /* in copy, in reg */ + *ip++ = (Ins){OCopy, TMP(Tmp0+t), {R, R}, Kw}; + radd(&mbeg, Tmp0+t, t+1); + break; + } + + if (ip == ins) + /* cancel if the parallel move + * is empty + */ + goto Nxt; + + /* find registers for temporaries + * in mbeg + */ + nr = ip - ins; + rm = (1ull << (nr+1)) - 1; + for (i=0; i<nr; i++) + reg[i] = i+1; + + for (;;) { + /* set registers on copies + */ + for (i=0, i1=ins; i1<ip; i1++, i++) + i1->arg[0] = TMP(reg[i]); + + /* compile the parallel move + */ + rcopy(&mend, &mbeg); + dopm(&dummyb, ip-1, &mend); + cnt++; + + /* check that mend contain mappings for + * source registers and does not map any + * assigned temporary, then check that + * all temporaries in mend are mapped in + * mbeg and not used in the copy + */ + for (i1=ins; i1<ip; i1++) { + r = i1->arg[0].val; + assert(rfree(&mend, r) == r); + t = i1->to.val; + assert(!bshas(mend.b, t)); + } + for (i=0; i<mend.n; i++) { + t = mend.t[i]; + assert(bshas(mbeg.b, t)); + t -= Tmp0; + assert(((tm >> (2*t)) & 3) == 1); + } + + /* execute the code generated and check + * that all assigned temporaries got their + * value, and that all live variables's + * content got preserved + */ + for (i=1; i<=NIReg; i++) + val[i] = i; + iexec(val); + for (i1=ins; i1<ip; i1++) { + t = i1->to.val; + r = rfind(&mbeg, t); + if (r != -1) + assert(val[r] == i1->arg[0].val); + } + for (i=0; i<mend.n; i++) { + t = mend.t[i]; + r = mend.r[i]; + assert(val[t-Tmp0+1] == r); + } + + /* find the next register assignment */ + i = nr - 1; + for (;;) { + r = reg[i]; + rm &= ~(1ull<<r); + do + r++; + while (r <= NIReg && (rm & (1ull<<r))); + if (r == NIReg+1) { + if (i == 0) + goto Nxt; + i--; + } else { + rm |= (1ull<<r); + reg[i++] = r; + break; + } + } + for (; i<nr; i++) + for (r=1; r<=NIReg; r++) + if (!(rm & (1ull<<r))) { + rm |= (1ull<<r); + reg[i] = r; + break; + } + } + Nxt: ; + } + printf("%llu tests successful!\n", cnt); + exit(0); +} + + +/* execute what pmgen() wrote (swap, copy) */ + +#define validr(r) \ + rtype(r) == RTmp && \ + r.val > 0 && \ + r.val <= NIReg + +static void +iexec(int val[]) +{ + Ins *i; + int t; + + for (i=insb; i<curi; i++) + switch (i->op) { + default: + assert(!"iexec: missing case\n"); + exit(1); + case OSwap: + assert(validr(i->arg[0])); + assert(validr(i->arg[1])); + t = val[i->arg[0].val]; + val[i->arg[0].val] = val[i->arg[1].val]; + val[i->arg[1].val] = t; + break; + case OCopy: + assert(validr(i->to)); + assert(validr(i->arg[0])); + val[i->to.val] = val[i->arg[0].val]; + break; + } +} + + +/* failure diagnostics */ + +static int re; + +static void +replay() +{ + RMap mend; + + re = 1; + bsinit(mend.b, Tmp0+NIReg); + rcopy(&mend, &mbeg); + dopm(&dummyb, ip-1, &mend); +} + +static void +fail() +{ + Ins *i1; + int i; + + printf("\nIn registers: "); + for (i=0; i<mbeg.n; i++) + printf("%s(r%d) ", + tmp[mbeg.t[i]].name, + mbeg.r[i]); + printf("\n"); + printf("Parallel move:\n"); + for (i1=ins; i1<ip; i1++) + printf("\t %s <- r%d\n", + tmp[i1->to.val].name, + i1->arg[0].val); + replay(); + abort(); +} + +static void +assert_test(char *s, int x) +{ + if (x) + return; + if (re) + abort(); + printf("!assertion failure: %s\n", s); + fail(); +} + +/* symbols required by the linker */ +char debug['Z'+1]; diff --git a/src/tools/regress.sh b/src/tools/regress.sh new file mode 100755 index 0000000..4106b00 --- /dev/null +++ b/src/tools/regress.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +for t in test/* +do + printf "Test $t ... " + + ./qbe $t >/tmp/out.0 2>&1 + ./qbe.1 $t >/tmp/out.1 2>&1 + + if diff /tmp/out.0 /tmp/out.1 > /dev/null + then + echo "OK" + else + echo "KO" + break + fi +done diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..65b3ff8 --- /dev/null +++ b/src/util.c @@ -0,0 +1,329 @@ +#include "all.h" + +typedef struct Bitset Bitset; +typedef struct Vec Vec; + +struct Vec { + ulong mag; + size_t esz; + ulong cap; + union { + long long ll; + long double ld; + void *ptr; + } align[]; +}; + +enum { + VMin = 2, + VMag = 0xcabba9e, + NPtr = 256, +}; + +Typ typ[NTyp]; +Ins insb[NIns], *curi; + +static void *ptr[NPtr]; +static void **pool = ptr; +static int nptr = 1; + +void +diag(char *s) +{ + fputs(s, stderr); + fputc('\n', stderr); + abort(); +} + +void * +emalloc(size_t n) +{ + void *p; + + p = calloc(1, n); + if (!p) + diag("emalloc: out of memory"); + return p; +} + +void * +alloc(size_t n) +{ + void **pp; + + if (n == 0) + return 0; + if (nptr >= NPtr) { + pp = emalloc(NPtr * sizeof(void *)); + pp[0] = pool; + pool = pp; + nptr = 1; + } + return pool[nptr++] = emalloc(n); +} + +void +freeall() +{ + void **pp; + + for (;;) { + for (pp = &pool[1]; pp < &pool[nptr]; pp++) + free(*pp); + pp = pool[0]; + if (!pp) + break; + free(pool); + pool = pp; + nptr = NPtr; + } + nptr = 1; +} + +Blk * +blknew() +{ + static Blk z; + Blk *b; + + b = alloc(sizeof *b); + *b = z; + return b; +} + +void +emit(int op, int k, Ref to, Ref arg0, Ref arg1) +{ + if (curi == insb) + diag("emit: too many instructions"); + *--curi = (Ins){ + .op = op, .cls = k, + .to = to, .arg = {arg0, arg1} + }; +} + +void +emiti(Ins i) +{ + emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]); +} + +void +idup(Ins **pd, Ins *s, ulong n) +{ + *pd = alloc(n * sizeof(Ins)); + memcpy(*pd, s, n * sizeof(Ins)); +} + +Ins * +icpy(Ins *d, Ins *s, ulong n) +{ + memcpy(d, s, n * sizeof(Ins)); + return d + n; +} + +void * +vnew(ulong len, size_t esz) +{ + ulong cap; + Vec *v; + + for (cap=VMin; cap<len; cap*=2) + ; + v = alloc(cap * esz + sizeof(Vec)); + v->mag = VMag; + v->cap = cap; + v->esz = esz; + return v + 1; +} + +void +vgrow(void *vp, ulong len) +{ + Vec *v; + void *v1; + + v = *(Vec **)vp - 1; + assert(v+1 && v->mag == VMag); + if (v->cap >= len) + return; + v1 = vnew(len, v->esz); + memcpy(v1, v+1, v->cap * v->esz); + *(Vec **)vp = v1; +} + +int +phicls(int t, Tmp *tmp /*, int c*/) +{ + if (tmp[t].phi) + return tmp[t].phi; + return t; +#if 0 + int t1; + + t1 = tmp[t].phi; + if (!t1) + t1 = t; + if (t != t1) { + t1 = phitmp(t1, tmp, c); + if (c) + tmp[t].phi = t1; + } + return t1; +#endif +} + +Ref +newtmp(char *prfx, int k, Fn *fn) +{ + static int n; + int t; + + t = fn->ntmp++; + vgrow(&fn->tmp, fn->ntmp); + sprintf(fn->tmp[t].name, "%s%d", prfx, ++n); + fn->tmp[t].cls = k; + fn->tmp[t].slot = -1; + fn->tmp[t].nuse = +1; + fn->tmp[t].ndef = +1; + return TMP(t); +} + +Ref +getcon(int64_t val, Fn *fn) +{ + int c; + + for (c=0; c<fn->ncon; c++) + if (fn->con[c].type == CBits && fn->con[c].bits.i == val) + return CON(c); + fn->ncon++; + vgrow(&fn->con, fn->ncon); + fn->con[c] = (Con){.type = CBits, .bits.i = val}; + return CON(c); +} + +void +addcon(Con *c0, Con *c1) +{ + if (c0->type == CUndef) + *c0 = *c1; + else { + if (c1->type == CAddr) { + if (c0->type == CAddr) + diag("addcon: adding two addresses"); + c0->type = CAddr; + strcpy(c0->label, c1->label); + } + c0->bits.i += c1->bits.i; + } +} + +void +bsinit(BSet *bs, uint n) +{ + n = (n + NBit-1) / NBit; + bs->nt = n; + bs->t = alloc(n * sizeof bs->t[0]); +} + +uint +bscount(BSet *bs) +{ + uint i, j, n; + + n = 0; + for (i=0; i<bs->nt; i++) + for (j=0; j<NBit; j++) + if (bs->t[i] & BIT(j)) + n++; + return n; +} + +static inline uint +bsmax(BSet *bs) +{ + return bs->nt * NBit; +} + +void +bsset(BSet *bs, uint elt) +{ + assert(elt < bsmax(bs)); + bs->t[elt/NBit] |= BIT(elt%NBit); +} + +void +bsclr(BSet *bs, uint elt) +{ + assert(elt < bsmax(bs)); + bs->t[elt/NBit] &= ~BIT(elt%NBit); +} + +#define BSOP(f, op) \ + void \ + f(BSet *a, BSet *b) \ + { \ + uint i; \ + \ + assert(a->nt == b->nt); \ + for (i=0; i<a->nt; i++) \ + a->t[i] op b->t[i]; \ + } + +BSOP(bscopy, =) +BSOP(bsunion, |=) +BSOP(bsinter, &=) +BSOP(bsdiff, &= ~) + +int +bsequal(BSet *a, BSet *b) +{ + uint i; + + assert(a->nt == b->nt); + for (i=0; i<a->nt; i++) + if (a->t[i] != b->t[i]) + return 0; + return 1; +} + +void +bszero(BSet *bs) +{ + memset(bs->t, 0, bs->nt * sizeof bs->t[0]); +} + +/* iterates on a bitset, use as follows + * + * for (i=0; bsiter(set, &i); i++) + * use(i); + * + */ +int +bsiter(BSet *bs, uint *elt) +{ + uint i; + + for (i=*elt;; i++) { + while (i < bsmax(bs) && !bs->t[i/NBit]) + i = (i + NBit) & -NBit; + if (i >= bsmax(bs)) + return 0; + if (bshas(bs, i)) { + *elt = i; + return 1; + } + } +} + +void +dumpts(BSet *bs, Tmp *tmp, FILE *f) +{ + uint t; + + fprintf(f, "["); + for (t=Tmp0; bsiter(bs, &t); t++) + fprintf(f, " %s", tmp[t].name); + fprintf(f, " ]\n"); +} |