From b75cb8388fb9b5f2393443d008bb46c522c5ec9b Mon Sep 17 00:00:00 2001 From: Quentin Carbonneaux Date: Mon, 28 Mar 2016 12:53:53 -0400 Subject: new layout, put LICENSE in root --- .gitignore | 5 +- .tag | 11 + LICENSE | 19 + Makefile | 45 ++- README | 8 + all.h | 563 +++++++++++++++++++++++++++ copy.c | 159 ++++++++ emit.c | 669 ++++++++++++++++++++++++++++++++ isel.c | 1136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ live.c | 174 +++++++++ main.c | 140 +++++++ mem.c | 81 ++++ minic/LICENSE | 21 - minic/Makefile | 4 +- minic/mcc | 2 +- parse.c | 1099 ++++++++++++++++++++++++++++++++++++++++++++++++++++ rega.c | 598 ++++++++++++++++++++++++++++ spill.c | 507 ++++++++++++++++++++++++ src/.gitignore | 5 - src/.tag | 11 - src/Makefile | 24 -- src/all.h | 563 --------------------------- src/copy.c | 159 -------- src/emit.c | 669 -------------------------------- src/isel.c | 1136 ------------------------------------------------------ src/live.c | 174 --------- src/main.c | 140 ------- src/mem.c | 81 ---- src/parse.c | 1099 ---------------------------------------------------- src/rega.c | 598 ---------------------------- src/spill.c | 507 ------------------------ src/ssa.c | 516 ------------------------- src/util.c | 329 ---------------- ssa.c | 516 +++++++++++++++++++++++++ test/go.sh | 119 ------ tools/abifuzz.sh | 2 +- tools/fptox.c | 18 - tools/pmov.c | 2 +- tools/regress.sh | 17 - tools/unit.sh | 119 ++++++ util.c | 329 ++++++++++++++++ 41 files changed, 6175 insertions(+), 6199 deletions(-) create mode 100644 .tag create mode 100644 LICENSE create mode 100644 README create mode 100644 all.h create mode 100644 copy.c create mode 100644 emit.c create mode 100644 isel.c create mode 100644 live.c create mode 100644 main.c create mode 100644 mem.c delete mode 100644 minic/LICENSE create mode 100644 parse.c create mode 100644 rega.c create mode 100644 spill.c delete mode 100644 src/.gitignore delete mode 100644 src/.tag delete mode 100644 src/Makefile delete mode 100644 src/all.h delete mode 100644 src/copy.c delete mode 100644 src/emit.c delete mode 100644 src/isel.c delete mode 100644 src/live.c delete mode 100644 src/main.c delete mode 100644 src/mem.c delete mode 100644 src/parse.c delete mode 100644 src/rega.c delete mode 100644 src/spill.c delete mode 100644 src/ssa.c delete mode 100644 src/util.c create mode 100644 ssa.c delete mode 100755 test/go.sh delete mode 100644 tools/fptox.c delete mode 100755 tools/regress.sh create mode 100755 tools/unit.sh create mode 100644 util.c diff --git a/.gitignore b/.gitignore index 2b1c6c0..c466bca 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ -papers +obj +config.h +.comfile +*.out diff --git a/.tag b/.tag new file mode 100644 index 0000000..5b8c210 --- /dev/null +++ b/.tag @@ -0,0 +1,11 @@ +Look slot( + +Get lisc.h +Get parse.c +Get isel.c +Get spill.c +Get rega.c +Get emit.c + +New +|fmt diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0b358b4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +© 2015-2016 Quentin Carbonneaux + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile index 9aefa3e..7c2bec8 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,41 @@ -all clean check: - @make -C src $@ - @make -C minic $@ +BIN = qbe -sync-papers: - unison -auto papers ssh://qcar@h/data/d/ssa-doc +V = @ +OBJDIR = obj -.PHONY: all clean check sync-papers +SRC = main.c util.c parse.c mem.c ssa.c copy.c live.c isel.c spill.c rega.c emit.c +OBJ = $(SRC:%.c=$(OBJDIR)/%.o) + +CFLAGS += -Wall -Wextra -std=c99 -g -pedantic + +$(OBJDIR)/$(BIN): $(OBJ) $(OBJDIR)/timestamp + @echo "ld $@" + $(V)$(CC) $(LDFLAGS) $(OBJ) -o $@ + +$(OBJDIR)/%.o: %.c $(OBJDIR)/timestamp + @echo "cc $<" + $(V)$(CC) $(CFLAGS) -c $< -o $@ + +$(OBJDIR)/timestamp: + @mkdir -p $(OBJDIR) + @touch $@ + +$(OBJ): all.h +obj/main.o: config.h + +config.h: + @case `uname` in \ + *Darwin*) echo "#define Defaultasm Gasmacho" ;; \ + *Linux*) echo "#define Defaultasm Gaself" ;; \ + esac > $@ + +clean: + rm -fr $(OBJDIR) + +clean-gen: clean + rm -f config.h + +check: $(OBJDIR)/$(BIN) + tools/unit.sh all + +.PHONY: clean clean-gen check syndoc diff --git a/README b/README new file mode 100644 index 0000000..2f3d90e --- /dev/null +++ b/README @@ -0,0 +1,8 @@ +QBE - Backend Compiler http://c9x.me/compile/ + +doc/ Documentation. +minic/ An example C frontend for QBE. +tools/ Miscelaneous tools (testing). +test/ Unit tests. + +The LICENSE file applies to all files distributed. diff --git a/all.h b/all.h new file mode 100644 index 0000000..40c80f6 --- /dev/null +++ b/all.h @@ -0,0 +1,563 @@ +#include +#include +#include +#include +#include +#include + +#define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1] + +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned long ulong; +typedef unsigned long bits; + +typedef struct BSet BSet; +typedef struct Ref Ref; +typedef struct OpDesc OpDesc; +typedef struct Ins Ins; +typedef struct Phi Phi; +typedef struct Blk Blk; +typedef struct Use Use; +typedef struct Tmp Tmp; +typedef struct Con Con; +typedef struct Addr Mem; +typedef struct Fn Fn; +typedef struct Typ Typ; +typedef struct Dat Dat; + +enum Reg { + RXX, + + RAX, /* caller-save */ + RCX, + RDX, + RSI, + RDI, + R8, + R9, + R10, + R11, + + RBX, /* callee-save */ + R12, + R13, + R14, + R15, + + RBP, /* reserved */ + RSP, + + XMM0, /* sse */ + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, + XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15, + + Tmp0, /* first non-reg temporary */ + + NIReg = R12 - RAX + 1, + NFReg = XMM14 - XMM0 + 1, + NISave = 9, + NFSave = NFReg, + NRSave = NISave + NFSave, + NRClob = 5, +}; + +enum { + NString = 32, + NPred = 63, + NIns = 8192, + NAlign = 3, + NSeg = 32, + NTyp = 128, + NBit = CHAR_BIT * sizeof(bits), +}; + +MAKESURE(NBit_is_enough, NBit >= (int)Tmp0); + +#define BIT(n) ((bits)1 << (n)) + +struct BSet { + uint nt; + bits *t; +}; + +struct Ref { + uint16_t type:2; + uint16_t val:14; +}; + +enum Alt { + AType, + ACall, + AMem, + + AShift = 12, + AMask = (1<> AShift); + return r.type; +} + +static inline int isreg(Ref r) +{ + return rtype(r) == RTmp && r.val < Tmp0; +} + +enum ICmp { +#define ICMPS(X) \ + X(ule) \ + X(ult) \ + X(sle) \ + X(slt) \ + X(sgt) \ + X(sge) \ + X(ugt) \ + X(uge) \ + X(eq) \ + X(ne) /* make sure icmpop() below works! */ + +#define X(c) IC##c, + ICMPS(X) +#undef X + NICmp, + + ICXnp = NICmp, /* x64 specific */ + ICXp, + NXICmp +}; + +static inline int icmpop(int c) +{ + return c >= ICeq ? c : ICuge - c; +} + +enum FCmp { +#define FCMPS(X) \ + X(le) \ + X(lt) \ + X(gt) \ + X(ge) \ + X(ne) \ + X(eq) \ + X(o) \ + X(uo) + +#define X(c) FC##c, + FCMPS(X) +#undef X + NFCmp +}; + +enum Class { + Kw, + Kl, + Ks, + Kd +}; + +#define KWIDE(k) ((k)&1) +#define KBASE(k) ((k)>>1) + +enum Op { + OXXX, + + /* public instructions */ + OAdd, + OSub, + ODiv, + ORem, + OUDiv, + OURem, + OMul, + OAnd, + OOr, + OXor, + OSar, + OShr, + OShl, + OCmpw, + OCmpw1 = OCmpw + NICmp-1, + OCmpl, + OCmpl1 = OCmpl + NICmp-1, + OCmps, + OCmps1 = OCmps + NFCmp-1, + OCmpd, + OCmpd1 = OCmpd + NFCmp-1, + + OStored, + OStores, + OStorel, + OStorew, + OStoreh, + OStoreb, +#define isstore(o) (OStored <= o && o <= OStoreb) + OLoadsw, /* needs to match OExt (mem.c) */ + OLoaduw, + OLoadsh, + OLoaduh, + OLoadsb, + OLoadub, + OLoad, +#define isload(o) (OLoadsw <= o && o <= OLoad) + OExtsw, + OExtuw, + OExtsh, + OExtuh, + OExtsb, + OExtub, +#define isext(o) (OExtsw <= o && o <= OExtub) + + OExts, + OTruncd, + OFtosi, + OSitof, + OCast, + + OAlloc, + OAlloc1 = OAlloc + NAlign-1, + + OCopy, + NPubOp, + + /* function instructions */ + OPar = NPubOp, + OParc, + OArg, + OArgc, + OCall, + + /* reserved instructions */ + ONop, + OAddr, + OSwap, + OSign, + OSAlloc, + OXIDiv, + OXDiv, + OXCmp, + OXSet, + OXSetnp = OXSet + ICXnp, + OXSetp = OXSet + ICXp, + OXTest, + NOp +}; + +enum Jmp { + JXXX, + JRet0, + JRetw, + JRetl, + JRets, + JRetd, + JRetc, +#define isret(j) (JRet0 <= j && j <= JRetc) + JJmp, + JJnz, + JXJc, + JXJnp = JXJc + ICXnp, + JXJp = JXJc + ICXp, + NJmp +}; + +struct OpDesc { + char *name; + int nmem; + char argcls[2][4]; + uint sflag:1; /* sets the zero flag */ + uint lflag:1; /* leaves flags */ +}; + +struct Ins { + ushort op:14; + Ref to; + Ref arg[2]; + ushort cls:2; +}; + +struct Phi { + Ref to; + Ref arg[NPred]; + Blk *blk[NPred]; + uint narg; + int cls; + Phi *link; +}; + +struct Blk { + Phi *phi; + Ins *ins; + uint nins; + struct { + short type; + Ref arg; + } jmp; + Blk *s1; + Blk *s2; + Blk *link; + + int id; + int visit; + + Blk *idom; + Blk *dom, *dlink; + Blk **fron; + int nfron; + + Blk **pred; + uint npred; + BSet in[1], out[1], gen[1]; + int nlive[2]; + int loop; + char name[NString]; +}; + +struct Use { + enum { + UXXX, + UPhi, + UIns, + UJmp, + } type; + int bid; + union { + Ins *ins; + Phi *phi; + } u; +}; + +struct Tmp { + char name[NString]; + Use *use; + uint ndef, nuse; + uint cost; + short slot; + short cls; + struct { + int r; + bits m; + } hint; + int phi; + int visit; +}; + +struct Con { + enum { + CUndef, + CBits, + CAddr, + } type; + char label[NString]; + union { + int64_t i; + double d; + float s; + } bits; + char flt; /* for printing, see parse.c */ + char local; +}; + +typedef struct Addr Addr; + +struct Addr { /* x64 addressing */ + Con offset; + Ref base; + Ref index; + int scale; +}; + +struct Fn { + Blk *start; + Tmp *tmp; + Con *con; + Mem *mem; + int ntmp; + int ncon; + int nmem; + int nblk; + int retty; /* index in typ[], -1 if no aggregate return */ + Ref retr; + Blk **rpo; + bits reg; + int slot; + char export; + char name[NString]; +}; + +struct Typ { + char name[NString]; + int dark; + uint size; + int align; + + struct { + uint isflt:1; + uint ispad:1; + uint len:30; + } seg[NSeg+1]; +}; + +struct Dat { + enum { + DStart, + DEnd, + DName, + DAlign, + DB, + DH, + DW, + DL, + DZ + } type; + union { + int64_t num; + double fltd; + float flts; + char *str; + struct { + char *nam; + int64_t off; + } ref; + } u; + char isref; + char isstr; + char export; +}; + + +/* main.c */ +enum Asm { + Gasmacho, + Gaself, +}; +extern char debug['Z'+1]; + +/* util.c */ +extern Typ typ[NTyp]; +extern Ins insb[NIns], *curi; +void diag(char *) __attribute__((noreturn)); +void *emalloc(size_t); +void *alloc(size_t); +void freeall(void); +Blk *blknew(void); +void emit(int, int, Ref, Ref, Ref); +void emiti(Ins); +void idup(Ins **, Ins *, ulong); +Ins *icpy(Ins *, Ins *, ulong); +void *vnew(ulong, size_t); +void vgrow(void *, ulong); +int phicls(int, Tmp *); +Ref newtmp(char *, int, Fn *); +Ref getcon(int64_t, Fn *); +void addcon(Con *, Con *); +void dumpts(BSet *, Tmp *, FILE *); + +void bsinit(BSet *, uint); +void bszero(BSet *); +uint bscount(BSet *); +void bsset(BSet *, uint); +void bsclr(BSet *, uint); +void bscopy(BSet *, BSet *); +void bsunion(BSet *, BSet *); +void bsinter(BSet *, BSet *); +void bsdiff(BSet *, BSet *); +int bsequal(BSet *, BSet *); +int bsiter(BSet *, uint *); + +static inline int +bshas(BSet *bs, uint elt) +{ + assert(elt < bs->nt * NBit); + return (bs->t[elt/NBit] & BIT(elt%NBit)) != 0; +} + +/* parse.c */ +extern OpDesc opdesc[NOp]; +void parse(FILE *, char *, void (Dat *), void (Fn *)); +void printfn(Fn *, FILE *); +void printref(Ref, Fn *, FILE *); +void err(char *, ...); + +/* mem.c */ +void memopt(Fn *); + +/* ssa.c */ +void filluse(Fn *); +void fillpreds(Fn *); +void fillrpo(Fn *); +void ssa(Fn *); + +/* copy.c */ +void copy(Fn *); + +/* live.c */ +void liveon(BSet *, Blk *, Blk *); +void filllive(Fn *); + +/* isel.c */ +extern int rsave[/* NRSave */]; +extern int rclob[/* NRClob */]; +bits retregs(Ref, int[2]); +bits argregs(Ref, int[2]); +void isel(Fn *); + +/* spill.c */ +void fillcost(Fn *); +void spill(Fn *); + +/* rega.c */ +void rega(Fn *); + +/* emit.c */ +extern char *locprefix; +extern char *symprefix; +void emitfn(Fn *, FILE *); +void emitdat(Dat *, FILE *); +int stashfp(int64_t, int); +void emitfin(FILE *); diff --git a/copy.c b/copy.c new file mode 100644 index 0000000..ef2d01d --- /dev/null +++ b/copy.c @@ -0,0 +1,159 @@ +#include "all.h" + +typedef struct RList RList; +struct RList { + int t; + RList *l; +}; + +static Ref +copyof(Ref r, Ref *cp) +{ + if (rtype(r) == RTmp) + return cp[r.val]; + else + return r; +} + +static void +update(Ref r, Ref rcp, Ref *cp, RList **w) +{ + RList *l; + + if (!req(cp[r.val], rcp)) { + cp[r.val] = rcp; + l = emalloc(sizeof *l); + l->t = r.val; + l->l = *w; + *w = l; + } +} + +static void +visitphi(Phi *p, Ref *cp, RList **w) +{ + uint a; + Ref r, r1; + + r = R; + for (a=0; anarg; a++) { + r1 = copyof(p->arg[a], cp); + if (req(r1, R)) + continue; + if (req(r, R) || req(r, r1)) + r = r1; + else { + r = p->to; + break; + } + } + assert(!req(r, R)); + update(p->to, r, cp, w); +} + +static void +visitins(Ins *i, Ref *cp, RList **w) +{ + Ref r; + + if (i->op == OCopy) { + r = copyof(i->arg[0], cp); + update(i->to, r, cp, w); + } else if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + update(i->to, i->to, cp, w); + } +} + +void +copy(Fn *fn) +{ + Blk *b; + Ref *cp, r; + RList *w, *w1; + Use *u, *u1; + Ins *i; + Phi *p, **pp; + uint a; + int t; + + w = 0; + cp = emalloc(fn->ntmp * sizeof cp[0]); + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) + visitphi(p, cp, &w); + for (i=b->ins; i-b->ins < b->nins; i++) + visitins(i, cp, &w); + } + while ((w1=w)) { + t = w->t; + w = w->l; + free(w1); + u = fn->tmp[t].use; + u1 = u + fn->tmp[t].nuse; + for (; utype) { + default: + diag("copy: invalid use"); + case UPhi: + visitphi(u->u.phi, cp, &w); + break; + case UIns: + visitins(u->u.ins, cp, &w); + break; + case UJmp: + break; + } + } + for (b=fn->start; b; b=b->link) { + for (pp=&b->phi; (p=*pp);) { + r = cp[p->to.val]; + if (!req(r, p->to)) { + *pp = p->link; + continue; + } + for (a=0; anarg; a++) + if (rtype(p->arg[a]) == RTmp) { + r = cp[p->arg[a].val]; + assert(!req(r, R)); + p->arg[a] = r; + } + pp=&p->link; + } + for (i=b->ins; i-b->ins < b->nins; i++) { + r = cp[i->to.val]; + if (!req(r, i->to)) { + *i = (Ins){.op = ONop}; + continue; + } + for (a=0; a<2; a++) + if (rtype(i->arg[a]) == RTmp) { + r = cp[i->arg[a].val]; + assert(!req(r, R)); + i->arg[a] = r; + } + } + if (rtype(b->jmp.arg) == RTmp) { + r = cp[b->jmp.arg.val]; + assert(!req(r, R)); + b->jmp.arg = r; + } + } + if (debug['C']) { + fprintf(stderr, "\n> Copy information:"); + for (t=Tmp0; tntmp; t++) { + if (req(cp[t], R)) { + fprintf(stderr, "\n%10s not seen!", + fn->tmp[t].name); + } + else if (!req(cp[t], TMP(t))) { + fprintf(stderr, "\n%10s copy of ", + fn->tmp[t].name); + printref(cp[t], fn, stderr); + } + } + fprintf(stderr, "\n\n> After copy elimination:\n"); + printfn(fn, stderr); + } + free(cp); +} diff --git a/emit.c b/emit.c new file mode 100644 index 0000000..9b2975d --- /dev/null +++ b/emit.c @@ -0,0 +1,669 @@ +#include "all.h" + +char *locprefix, *symprefix; + +enum { + SLong = 0, + SWord = 1, + SShort = 2, + SByte = 3, + + Ki = -1, /* matches Kw and Kl */ + Ka = -2, /* matches all classes */ +}; + +/* Instruction format strings: + * + * if the format string starts with -, the instruction + * is assumed to be 3-address and is put in 2-address + * mode using an extra mov if necessary + * + * if the format string starts with +, the same as the + * above applies, but commutativity is also assumed + * + * %k is used to set the class of the instruction, + * it'll expand to "l", "q", "ss", "sd", depending + * on the instruction class + * %0 designates the first argument + * %1 designates the second argument + * %= designates the result + * + * if %k is not used, a prefix to 0, 1, or = must be + * added, it can be: + * M - memory reference + * L - long (64 bits) + * W - word (32 bits) + * H - short (16 bits) + * B - byte (8 bits) + * S - single precision float + * D - double precision float + */ +static struct { + short op; + short cls; + char *asm; +} omap[] = { + { OAdd, Ka, "+add%k %1, %=" }, + { OSub, Ka, "-sub%k %1, %=" }, + { OAnd, Ki, "+and%k %1, %=" }, + { OOr, Ki, "+or%k %1, %=" }, + { OXor, Ki, "+xor%k %1, %=" }, + { OSar, Ki, "-sar%k %B1, %=" }, + { OShr, Ki, "-shr%k %B1, %=" }, + { OShl, Ki, "-shl%k %B1, %=" }, + { OMul, Ki, "+imul%k %1, %=" }, + { OMul, Ks, "+mulss %1, %=" }, /* fixme */ + { OMul, Kd, "+mulsd %1, %=" }, + { ODiv, Ka, "-div%k %1, %=" }, + { OStorel, Ka, "movq %L0, %M1" }, + { OStorew, Ka, "movl %W0, %M1" }, + { OStoreh, Ka, "movw %H0, %M1" }, + { OStoreb, Ka, "movb %B0, %M1" }, + { OStores, Ka, "movss %S0, %M1" }, + { OStored, Ka, "movsd %D0, %M1" }, + { OLoad, Ka, "mov%k %M0, %=" }, + { OLoadsw, Kl, "movslq %M0, %L=" }, + { OLoadsw, Kw, "movl %M0, %W=" }, + { OLoaduw, Ki, "movl %M0, %W=" }, + { OLoadsh, Ki, "movsw%k %M0, %=" }, + { OLoaduh, Ki, "movzw%k %M0, %=" }, + { OLoadsb, Ki, "movsb%k %M0, %=" }, + { OLoadub, Ki, "movzb%k %M0, %=" }, + { OExtsw, Kl, "movslq %W0, %L=" }, + { OExtuw, Kl, "movl %W0, %W=" }, + { OExtsh, Ki, "movsw%k %H0, %=" }, + { OExtuh, Ki, "movzw%k %H0, %=" }, + { OExtsb, Ki, "movsb%k %B0, %=" }, + { OExtub, Ki, "movzb%k %B0, %=" }, + + { OExts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */ + { OTruncd, Ks, "cvttsd2ss %0, %=" }, + { OFtosi, Kw, "cvttss2si %0, %=" }, + { OFtosi, Kl, "cvttsd2si %0, %=" }, + { OSitof, Ks, "cvtsi2ss %W0, %=" }, + { OSitof, Kd, "cvtsi2sd %L0, %=" }, + { OCast, Ki, "movq %D0, %L=" }, + { OCast, Ka, "movq %L0, %D=" }, + + { OAddr, Ki, "lea%k %M0, %=" }, + { OSwap, Ki, "xchg%k %0, %1" }, + { OSign, Kl, "cqto" }, + { OSign, Kw, "cltd" }, + { OXDiv, Ki, "div%k %0" }, + { OXIDiv, Ki, "idiv%k %0" }, + { OXCmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */ + { OXCmp, Kd, "comisd %D0, %D1" }, + { OXCmp, Ki, "cmp%k %0, %1" }, + { OXTest, Ki, "test%k %0, %1" }, + { OXSet+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" }, + { OXSet+ICXp, Ki, "setp %B=\n\tmovsb%k %B=, %=" }, + { NOp, 0, 0 } +}; + +static char *rname[][4] = { + [RAX] = {"rax", "eax", "ax", "al"}, + [RBX] = {"rbx", "ebx", "bx", "bl"}, + [RCX] = {"rcx", "ecx", "cx", "cl"}, + [RDX] = {"rdx", "edx", "dx", "dl"}, + [RSI] = {"rsi", "esi", "si", "sil"}, + [RDI] = {"rdi", "edi", "di", "dil"}, + [RBP] = {"rbp", "ebp", "bp", "bpl"}, + [RSP] = {"rsp", "esp", "sp", "spl"}, + [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, + [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, + [R10] = {"r10", "r10d", "r10w", "r10b"}, + [R11] = {"r11", "r11d", "r11w", "r11b"}, + [R12] = {"r12", "r12d", "r12w", "r12b"}, + [R13] = {"r13", "r13d", "r13w", "r13b"}, + [R14] = {"r14", "r14d", "r14w", "r14b"}, + [R15] = {"r15", "r15d", "r15w", "r15b"}, +}; + + +static int +slot(int s, Fn *fn) +{ + struct { int i:14; } x; + + /* sign extend s using a bitfield */ + x.i = s; + assert(NAlign == 3); + if (x.i < 0) + return -4 * x.i; + else { + assert(fn->slot >= x.i); + return -4 * (fn->slot - x.i); + } +} + +static void +emitcon(Con *con, FILE *f) +{ + switch (con->type) { + default: + diag("emit: invalid constant"); + case CAddr: + fprintf(f, "%s%s", con->local ? locprefix : symprefix, con->label); + if (con->bits.i) + fprintf(f, "%+"PRId64, con->bits.i); + break; + case CBits: + fprintf(f, "%"PRId64, con->bits.i); + break; + } +} + +static char * +regtoa(int reg, int sz) +{ + static char buf[6]; + + if (reg >= XMM0) { + sprintf(buf, "xmm%d", reg-XMM0); + return buf; + } else + return rname[reg][sz]; +} + +static Ref +getarg(char c, Ins *i) +{ + switch (c) { + default: + diag("emit: 0, 1, = expected in format"); + case '0': + return i->arg[0]; + case '1': + return i->arg[1]; + case '=': + return i->to; + } +} + +static void emitins(Ins, Fn *, FILE *); + +static void +emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) +{ + Ins icp; + + icp.op = OCopy; + icp.arg[0] = r2; + icp.to = r1; + icp.cls = k; + emitins(icp, fn, f); +} + +static void +emitf(char *s, Ins *i, Fn *fn, FILE *f) +{ + static char clstoa[][3] = {"l", "q", "ss", "sd"}; + char c; + int sz; + Ref ref; + Mem *m; + Con off; + + switch (*s) { + case '+': + if (req(i->arg[1], i->to)) { + ref = i->arg[0]; + i->arg[0] = i->arg[1]; + i->arg[1] = ref; + } + /* fall through */ + case '-': + if (req(i->arg[1], i->to) && !req(i->arg[0], i->to)) + diag("emit: cannot convert to 2-address"); + emitcopy(i->to, i->arg[0], i->cls, fn, f); + s++; + break; + } + + fputc('\t', f); +Next: + while ((c = *s++) != '%') + if (!c) { + fputc('\n', f); + return; + } else + fputc(c, f); + switch ((c = *s++)) { + default: + diag("emit: invalid escape"); + case '%': + fputc('%', f); + break; + case 'k': + fputs(clstoa[i->cls], f); + break; + case '0': + case '1': + case '=': + sz = KWIDE(i->cls) ? SLong : SWord; + s--; + /* fall through */ + case 'D': + case 'S': + Ref: + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + default: + diag("emit: invalid reference"); + case RTmp: + assert(isreg(ref)); + fprintf(f, "%%%s", regtoa(ref.val, sz)); + break; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RAMem: + Mem: + m = &fn->mem[ref.val & AMask]; + if (rtype(m->base) == RSlot) { + off.type = CBits; + off.bits.i = slot(m->base.val, fn); + addcon(&m->offset, &off); + m->base = TMP(RBP); + } + if (m->offset.type != CUndef) + emitcon(&m->offset, f); + fputc('(', f); + if (req(m->base, R)) + fprintf(f, "%%rip"); + else + fprintf(f, "%%%s", regtoa(m->base.val, SLong)); + if (!req(m->index, R)) + fprintf(f, ", %%%s, %d", + regtoa(m->index.val, SLong), + m->scale + ); + fputc(')', f); + break; + case RCon: + fputc('$', f); + emitcon(&fn->con[ref.val], f); + break; + } + break; + case 'L': + sz = SLong; + goto Ref; + case 'W': + sz = SWord; + goto Ref; + case 'H': + sz = SShort; + goto Ref; + case 'B': + sz = SByte; + goto Ref; + case 'M': + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + default: + diag("emit: invalid memory reference"); + case RAMem: + goto Mem; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RCon: + emitcon(&fn->con[ref.val], f); + fprintf(f, "(%%rip)"); + break; + case RTmp: + assert(isreg(ref)); + fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); + break; + } + break; + } + goto Next; +} + +static void +emitins(Ins i, Fn *fn, FILE *f) +{ + Ref r; + int64_t val; + int o; + + switch (i.op) { + default: + Table: + /* most instructions are just pulled out of + * the table omap[], some special cases are + * detailed below */ + for (o=0;; o++) { + /* this linear search should really be a binary + * search */ + if (omap[o].op == NOp) + diag("emit: no entry found for instruction"); + if (omap[o].op == i.op) + if (omap[o].cls == i.cls + || (omap[o].cls == Ki && KBASE(i.cls) == 0) + || (omap[o].cls == Ka)) + break; + } + emitf(omap[o].asm, &i, fn, f); + break; + case ONop: + /* just do nothing for nops, they are inserted + * by some passes */ + break; + case OMul: + /* here, we try to use the 3-addresss form + * of multiplication when possible */ + if (rtype(i.arg[1]) == RCon) { + r = i.arg[0]; + i.arg[0] = i.arg[1]; + i.arg[1] = r; + } + if (KBASE(i.cls) == 0 /* only available for ints */ + && rtype(i.arg[0]) == RCon + && rtype(i.arg[1]) == RTmp) { + emitf("imul%k %0, %1, %=", &i, fn, f); + break; + } + goto Table; + case OSub: + /* we have to use the negation trick to handle + * some 3-address substractions */ + if (req(i.to, i.arg[1])) { + emitf("neg%k %=", &i, fn, f); + emitf("add%k %0, %=", &i, fn, f); + break; + } + goto Table; + case OCopy: + /* make sure we don't emit useless copies, + * also, we can use a trick to load 64-bits + * registers, it's detailed in my note below + * http://c9x.me/art/notes.html?09/19/2015 */ + if (req(i.to, R) || req(i.arg[0], R)) + break; + if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && i.cls == Kl + && fn->con[i.arg[0].val].type == CBits + && (val = fn->con[i.arg[0].val].bits.i) >= 0 + && val <= UINT32_MAX) { + emitf("movl %W0, %W=", &i, fn, f); + } else if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && fn->con[i.arg[0].val].type == CAddr) { + emitf("lea%k %M0, %=", &i, fn, f); + } else if (!req(i.arg[0], i.to)) + emitf("mov%k %0, %=", &i, fn, f); + break; + case OCall: + /* calls simply have a weird syntax in AT&T + * assembly... */ + switch (rtype(i.arg[0])) { + default: + diag("emit: invalid call instruction"); + case RCon: + fprintf(f, "\tcallq "); + emitcon(&fn->con[i.arg[0].val], f); + fprintf(f, "\n"); + break; + case RTmp: + emitf("callq *%L0", &i, fn, f); + break; + } + break; + case OSAlloc: + /* there is no good reason why this is here + * maybe we should split OSAlloc in 2 different + * instructions depending on the result + */ + emitf("subq %L0, %%rsp", &i, fn, f); + if (!req(i.to, R)) + emitcopy(i.to, TMP(RSP), Kl, fn, f); + break; + case OSwap: + if (KBASE(i.cls) == 0) + goto Table; + /* for floats, there is no swap instruction + * so we use xmm15 as a temporary + */ + emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); + emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); + emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); + break; + } +} + +static int +cneg(int cmp) +{ + switch (cmp) { + default: diag("emit: cneg() unhandled comparison"); + case ICule: return ICugt; + case ICult: return ICuge; + case ICsle: return ICsgt; + case ICslt: return ICsge; + case ICsgt: return ICsle; + case ICsge: return ICslt; + case ICugt: return ICule; + case ICuge: return ICult; + case ICeq: return ICne; + case ICne: return ICeq; + case ICXnp: return ICXp; + case ICXp: return ICXnp; + } +} + +static int +framesz(Fn *fn) +{ + int i, o, f; + + assert(NAlign == 3); + for (i=0, o=0; ireg >> rclob[i]); + f = fn->slot; + f = (f + 3) & -4; + return 4*f + 8*o; +} + +void +emitfn(Fn *fn, FILE *f) +{ + static char *ctoa[] = { + [ICeq] = "z", + [ICule] = "be", + [ICult] = "b", + [ICsle] = "le", + [ICslt] = "l", + [ICsgt] = "g", + [ICsge] = "ge", + [ICugt] = "a", + [ICuge] = "ae", + [ICne] = "nz", + [ICXnp] = "np", + [ICXp] = "p" + }; + Blk *b, *s; + Ins *i, itmp; + int *r, c, fs; + + fprintf(f, ".text\n"); + if (fn->export) + fprintf(f, ".globl %s%s\n", symprefix, fn->name); + fprintf(f, + "%s%s:\n" + "\tpush %%rbp\n" + "\tmov %%rsp, %%rbp\n", + symprefix, fn->name + ); + fs = framesz(fn); + if (fs) + fprintf(f, "\tsub $%d, %%rsp\n", fs); + for (r=rclob; r-rclob < NRClob; r++) + if (fn->reg & BIT(*r)) { + itmp.arg[0] = TMP(*r); + emitf("pushq %L0", &itmp, fn, f); + } + + for (b=fn->start; b; b=b->link) { + fprintf(f, "%s%s:\n", locprefix, b->name); + for (i=b->ins; i!=&b->ins[b->nins]; i++) + emitins(*i, fn, f); + switch (b->jmp.type) { + case JRet0: + for (r=&rclob[NRClob]; r>rclob;) + if (fn->reg & BIT(*--r)) { + itmp.arg[0] = TMP(*r); + emitf("popq %L0", &itmp, fn, f); + } + fprintf(f, + "\tleave\n" + "\tret\n" + ); + break; + case JJmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp %s%s\n", locprefix, b->s1->name); + break; + default: + c = b->jmp.type - JXJc; + if (0 <= c && c <= NXICmp) { + if (b->link == b->s2) { + s = b->s1; + } else if (b->link == b->s1) { + c = cneg(c); + s = b->s2; + } else + diag("emit: unhandled jump (1)"); + fprintf(f, "\tj%s %s%s\n", ctoa[c], locprefix, s->name); + break; + } + diag("emit: unhandled jump (2)"); + } + } + +} + +void +emitdat(Dat *d, FILE *f) +{ + static int align; + static char *dtoa[] = { + [DAlign] = ".align", + [DB] = "\t.byte", + [DH] = "\t.value", + [DW] = "\t.long", + [DL] = "\t.quad" + }; + + switch (d->type) { + case DStart: + align = 0; + fprintf(f, ".data\n"); + break; + case DEnd: + break; + case DName: + if (!align) + fprintf(f, ".align 8\n"); + if (d->export) + fprintf(f, ".globl %s%s\n", symprefix, d->u.str); + fprintf(f, "%s%s:\n", symprefix, d->u.str); + break; + case DZ: + fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num); + break; + default: + if (d->type == DAlign) + align = 1; + + if (d->isstr) { + if (d->type != DB) + err("strings only supported for 'b' currently"); + fprintf(f, "\t.ascii \"%s\"\n", d->u.str); + } + else if (d->isref) { + fprintf(f, "%s %s%+"PRId64"\n", + dtoa[d->type], d->u.ref.nam, + d->u.ref.off); + } + else { + fprintf(f, "%s %"PRId64"\n", + dtoa[d->type], d->u.num); + } + break; + } +} + +typedef struct FBits FBits; + +struct FBits { + int64_t bits; + int wide; + FBits *link; +}; + +static FBits *stash; + +int +stashfp(int64_t n, int w) +{ + FBits **pb, *b; + int i; + + /* does a dumb de-dup of fp constants + * this should be the linker's job */ + for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++) + if (n == b->bits && w == b->wide) + return i; + b = emalloc(sizeof *b); + b->bits = n; + b->wide = w; + b->link = 0; + *pb = b; + return i; +} + +void +emitfin(FILE *f) +{ + FBits *b; + int i; + + if (!stash) + return; + fprintf(f, "/* floating point constants */\n"); + fprintf(f, ".data\n.align 8\n"); + for (b=stash, i=0; b; b=b->link, i++) + if (b->wide) + fprintf(f, + "%sfp%d:\n" + "\t.quad %"PRId64 + " /* %f */\n", + locprefix, i, b->bits, + *(double *)&b->bits + ); + for (b=stash, i=0; b; b=b->link, i++) + if (!b->wide) + fprintf(f, + "%sfp%d:\n" + "\t.long %"PRId64 + " /* %lf */\n", + locprefix, i, b->bits & 0xffffffff, + *(float *)&b->bits + ); + while ((b=stash)) { + stash = b->link; + free(b); + } +} diff --git a/isel.c b/isel.c new file mode 100644 index 0000000..2a55733 --- /dev/null +++ b/isel.c @@ -0,0 +1,1136 @@ +#include "all.h" +#include + +/* For x86_64, do the following: + * + * - lower calls + * - check that constants are used only in + * places allowed + * - ensure immediates always fit in 32b + * - explicit machine register contraints + * on instructions like division. + * - implement fast locals (the streak of + * constant allocX in the first basic block) + * - recognize complex addressing modes + * + * Invariant: the use counts that are used + * in sel() must be sound. This + * is not so trivial, maybe the + * dce should be moved out... + */ + +typedef struct ANum ANum; +typedef struct AClass AClass; +typedef struct RAlloc RAlloc; + +struct ANum { + char n, l, r; + Ins *i; + Ref mem; +}; + +static void amatch(Addr *, Ref, ANum *, Fn *, int); + +static int +fcmptoi(int fc) +{ + switch (fc) { + default: diag("isel: fcmptoi defaulted"); + case FCle: return ICule; + case FClt: return ICult; + case FCgt: return ICugt; + case FCge: return ICuge; + case FCne: return ICne; + case FCeq: return ICeq; + case FCo: return ICXnp; + case FCuo: return ICXp; + } +} + +static int +iscmp(int op, int *pk, int *pc) +{ + int k, c; + + if (OCmpw <= op && op <= OCmpw1) { + c = op - OCmpw; + k = Kw; + } + else if (OCmpl <= op && op <= OCmpl1) { + c = op - OCmpl; + k = Kl; + } + else if (OCmps <= op && op <= OCmps1) { + c = fcmptoi(op - OCmps); + k = Ks; + } + else if (OCmpd <= op && op <= OCmpd1) { + c = fcmptoi(op - OCmpd); + k = Kd; + } + else + return 0; + if (pk) + *pk = k; + if (pc) + *pc = c; + return 1; +} + +static int +noimm(Ref r, Fn *fn) +{ + int64_t val; + + if (rtype(r) != RCon) + return 0; + switch (fn->con[r.val].type) { + default: + diag("isel: invalid constant"); + case CAddr: + /* we only support the 'small' + * code model of the ABI, this + * means that we can always + * address data with 32bits + */ + return 0; + case CBits: + val = fn->con[r.val].bits.i; + return (val < INT32_MIN || val > INT32_MAX); + } +} + +static int +rslot(Ref r, Fn *fn) +{ + if (rtype(r) != RTmp) + return -1; + return fn->tmp[r.val].slot; +} + +static int +argcls(Ins *i, int n) +{ + return opdesc[i->op].argcls[n][i->cls]; +} + +static void +fixarg(Ref *r, int k, int phi, Fn *fn) +{ + Addr a; + Ref r0, r1; + int s, n; + + r1 = r0 = *r; + s = rslot(r0, fn); + if (KBASE(k) == 1 && rtype(r0) == RCon) { + /* load floating points from memory + * slots, they can't be used as + * immediates + */ + r1 = MEM(fn->nmem); + vgrow(&fn->mem, ++fn->nmem); + memset(&a, 0, sizeof a); + a.offset.type = CAddr; + a.offset.local = 1; + n = stashfp(fn->con[r0.val].bits.i, KWIDE(k)); + sprintf(a.offset.label, "fp%d", n); + fn->mem[fn->nmem-1] = a; + } + else if (!phi && k == Kl && noimm(r0, fn)) { + /* load constants that do not fit in + * a 32bit signed integer into a + * long temporary + */ + r1 = newtmp("isel", Kl, fn); + emit(OCopy, Kl, r1, r0, R); + } + else if (s != -1) { + /* load fast locals' addresses into + * temporaries right before the + * instruction + */ + r1 = newtmp("isel", Kl, fn); + emit(OAddr, Kl, r1, SLOT(s), R); + } + *r = r1; +} + +static void +chuse(Ref r, int du, Fn *fn) +{ + if (rtype(r) == RTmp) + fn->tmp[r.val].nuse += du; +} + +static void +seladdr(Ref *r, ANum *an, Fn *fn) +{ + Addr a; + Ref r0, r1; + + r0 = *r; + if (rtype(r0) == RTmp) { + chuse(r0, -1, fn); + r1 = an[r0.val].mem; + if (req(r1, R)) { + amatch(&a, r0, an, fn, 1); + vgrow(&fn->mem, ++fn->nmem); + fn->mem[fn->nmem-1] = a; + r1 = MEM(fn->nmem-1); + chuse(a.base, +1, fn); + chuse(a.index, +1, fn); + if (rtype(a.base) != RTmp) + if (rtype(a.index) != RTmp) + an[r0.val].mem = r1; + } + *r = r1; + } +} + +static void +selcmp(Ref arg[2], int k, Fn *fn) +{ + Ref r; + + if (rtype(arg[0]) == RCon) { + r = arg[1]; + arg[1] = arg[0]; + arg[0] = r; + } + assert(rtype(arg[0]) != RCon); + emit(OXCmp, k, R, arg[1], arg[0]); + fixarg(&curi->arg[0], k, 0, fn); +} + +static void +sel(Ins i, ANum *an, Fn *fn) +{ + Ref r0, r1; + int x, k, kc; + int64_t val; + Ins *i0; + + if (rtype(i.to) == RTmp) + if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) + if (fn->tmp[i.to.val].nuse == 0) { + chuse(i.arg[0], -1, fn); + chuse(i.arg[1], -1, fn); + return; + } + i0 = curi; + k = i.cls; + switch (i.op) { + case ODiv: + case ORem: + case OUDiv: + case OURem: + if (i.op == ODiv || i.op == OUDiv) + r0 = TMP(RAX), r1 = TMP(RDX); + else + r0 = TMP(RDX), r1 = TMP(RAX); + emit(OCopy, k, i.to, r0, R); + emit(OCopy, k, R, r1, R); + if (rtype(i.arg[1]) == RCon) { + /* immediates not allowed for + * divisions in x86 + */ + r0 = newtmp("isel", k, fn); + } else + r0 = i.arg[1]; + if (i.op == ODiv || i.op == ORem) { + emit(OXIDiv, k, R, r0, R); + emit(OSign, k, TMP(RDX), TMP(RAX), R); + } else { + emit(OXDiv, k, R, r0, R); + emit(OCopy, k, TMP(RDX), CON_Z, R); + } + emit(OCopy, k, TMP(RAX), i.arg[0], R); + if (rtype(i.arg[1]) == RCon) + emit(OCopy, k, r0, i.arg[1], R); + break; + case OSar: + case OShr: + case OShl: + if (rtype(i.arg[1]) == RCon) + goto Emit; + r0 = i.arg[1]; + i.arg[1] = TMP(RCX); + emit(OCopy, Kw, R, TMP(RCX), R); + emiti(i); + emit(OCopy, Kw, TMP(RCX), r0, R); + break; + case ONop: + break; + case OStored: + case OStores: + case OStorel: + case OStorew: + case OStoreh: + case OStoreb: + if (rtype(i.arg[0]) == RCon) { + if (i.op == OStored) + i.op = OStorel; + if (i.op == OStores) + i.op = OStorew; + } + seladdr(&i.arg[1], an, fn); + goto Emit; + case_OLoad: + seladdr(&i.arg[0], an, fn); + goto Emit; + case OCall: + case OSAlloc: + case OCopy: + case OAdd: + case OSub: + case OMul: + case OAnd: + case OOr: + case OXor: + case OXTest: + case OFtosi: + case OSitof: + case OExts: + case OTruncd: + case OCast: + case_OExt: +Emit: + emiti(i); + fixarg(&curi->arg[0], argcls(curi, 0), 0, fn); + fixarg(&curi->arg[1], argcls(curi, 1), 0, fn); + break; + case OAlloc: + case OAlloc+1: + case OAlloc+2: /* == OAlloc1 */ + /* we need to make sure + * the stack remains aligned + * (rsp = 0) mod 16 + */ + if (rtype(i.arg[0]) == RCon) { + assert(fn->con[i.arg[0].val].type == CBits); + val = fn->con[i.arg[0].val].bits.i; + val = (val + 15) & ~INT64_C(15); + if (val < 0 || val > INT32_MAX) + diag("isel: alloc too large"); + emit(OSAlloc, Kl, i.to, getcon(val, fn), R); + } else { + /* r0 = (i.arg[0] + 15) & -16 */ + r0 = newtmp("isel", Kl, fn); + r1 = newtmp("isel", Kl, fn); + emit(OSAlloc, Kl, i.to, r0, R); + emit(OAnd, Kl, r0, r1, getcon(-16, fn)); + emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn)); + } + break; + default: + if (isext(i.op)) + goto case_OExt; + if (isload(i.op)) + goto case_OLoad; + if (iscmp(i.op, &kc, &x)) { + if (rtype(i.arg[0]) == RCon) + x = icmpop(x); + emit(OXSet+x, k, i.to, R, R); + selcmp(i.arg, kc, fn); + break; + } + diag("isel: non-exhaustive implementation"); + } + + while (i0 > curi && --i0) + if (rslot(i0->arg[0], fn) != -1 + || rslot(i0->arg[1], fn) != -1) + diag("isel: usupported address argument"); +} + +static Ins * +flagi(Ins *i0, Ins *i) +{ + while (i>i0) { + i--; + if (opdesc[i->op].sflag) + return i; + if (opdesc[i->op].lflag) + continue; + return 0; + } + return 0; +} + +struct AClass { + int inmem; + int align; + uint size; + int cls[2]; +}; + +static void +aclass(AClass *a, Typ *t) +{ + int e, s, n, cls; + uint sz, al; + + sz = t->size; + al = 1u << t->align; + + /* the ABI requires sizes to be rounded + * up to the nearest multiple of 8, moreover + * it makes it easy load and store structures + * in registers + */ + if (al < 8) + al = 8; + sz = (sz + al-1) & -al; + + a->size = sz; + a->align = t->align; + + if (t->dark || sz > 16) { + /* large or unaligned structures are + * required to be passed in memory + */ + a->inmem = 1; + return; + } + + a->inmem = 0; + for (e=0, s=0; e<2; e++) { + cls = -1; + for (n=0; n<8 && t->seg[s].len; s++) { + if (t->seg[s].ispad) { + /* don't change anything */ + } + else if (t->seg[s].isflt) { + if (cls == -1) + cls = Kd; + } + else + cls = Kl; + n += t->seg[s].len; + } + assert(n <= 8); + a->cls[e] = cls; + } +} + +static void +blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) +{ + Ref r, r1; + uint boff; + + /* it's an impolite blit, we might go across the end + * of the source object a little bit... */ + for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { + r = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, r, r1); + emit(OAdd, Kl, r1, rstk, getcon(soff, fn)); + r1 = newtmp("abi", Kl, fn); + emit(OLoad, Kl, r, r1, R); + emit(OAdd, Kl, r1, rsrc, getcon(boff, fn)); + chuse(rsrc, +1, fn); + chuse(rstk, +1, fn); + } +} + +static int +retr(Ref reg[2], AClass *aret) +{ + static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; + int n, k, ca, nr[2]; + + nr[0] = nr[1] = 0; + ca = 0; + for (n=0; aret->cls[n]>=0 && n<2; n++) { + k = KBASE(aret->cls[n]); + reg[n] = TMP(retreg[k][nr[k]++]); + ca += 1 << (2 * k); + } + return ca; +} + +static void +selret(Blk *b, Fn *fn) +{ + int j, k, ca; + Ref r, r0, reg[2]; + AClass aret; + + j = b->jmp.type; + + if (!isret(j) || j == JRet0) + return; + + r0 = b->jmp.arg; + b->jmp.type = JRet0; + + if (j == JRetc) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + assert(rtype(fn->retr) == RTmp); + emit(OCopy, Kl, TMP(RAX), fn->retr, R); + chuse(fn->retr, +1, fn); + blit(fn->retr, 0, r0, aret.size, fn); + ca = 1; + } else { + ca = retr(reg, &aret); + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + emit(OLoad, Kl, reg[1], r, R); + emit(OAdd, Kl, r, r0, getcon(8, fn)); + chuse(r0, +1, fn); + } + emit(OLoad, Kl, reg[0], r0, R); + } + } else { + k = j - JRetw; + if (KBASE(k) == 0) { + emit(OCopy, k, TMP(RAX), r0, R); + ca = 1; + } else { + emit(OCopy, k, TMP(XMM0), r0, R); + ca = 1 << 2; + } + } + + b->jmp.arg = CALL(ca); +} + +static void +seljmp(Blk *b, Fn *fn) +{ + Ref r; + int c, k; + Ins *fi; + + if (b->jmp.type == JRet0 || b->jmp.type == JJmp) + return; + assert(b->jmp.type == JJnz); + r = b->jmp.arg; + b->jmp.arg = R; + assert(!req(r, R)); + if (rtype(r) == RCon) { + b->jmp.type = JJmp; + if (req(r, CON_Z)) + b->s1 = b->s2; + b->s2 = 0; + return; + } + fi = flagi(b->ins, &b->ins[b->nins]); + if (fi && req(fi->to, r)) { + if (iscmp(fi->op, &k, &c)) { + if (rtype(fi->arg[0]) == RCon) + c = icmpop(c); + b->jmp.type = JXJc + c; + if (fn->tmp[r.val].nuse == 1) { + assert(fn->tmp[r.val].ndef == 1); + selcmp(fi->arg, k, fn); + *fi = (Ins){.op = ONop}; + } + return; + } + if (fi->op == OAnd && fn->tmp[r.val].nuse == 1 + && (rtype(fi->arg[0]) == RTmp || + rtype(fi->arg[1]) == RTmp)) { + fi->op = OXTest; + fi->to = R; + b->jmp.type = JXJc + ICne; + if (rtype(fi->arg[1]) == RCon) { + r = fi->arg[1]; + fi->arg[1] = fi->arg[0]; + fi->arg[0] = r; + } + return; + } + /* since flags are not tracked in liveness, + * the result of the flag-setting instruction + * has to be marked as live + */ + if (fn->tmp[r.val].nuse == 1) + emit(OCopy, Kw, R, r, R); + b->jmp.type = JXJc + ICne; + return; + } + selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */ + b->jmp.type = JXJc + ICne; +} + +static int +classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret) +{ + int nint, ni, nsse, ns, n, *pn; + AClass *a; + Ins *i; + + if (aret && aret->inmem) + nint = 5; /* hidden argument */ + else + nint = 6; + nsse = 8; + for (i=i0, a=ac; iop == op) { + if (KBASE(i->cls) == 0) + pn = &nint; + else + pn = &nsse; + if (*pn > 0) { + --*pn; + a->inmem = 0; + } else + a->inmem = 2; + a->align = 3; + a->size = 8; + a->cls[0] = i->cls; + } else { + n = i->arg[0].val & AMask; + aclass(a, &typ[n]); + if (a->inmem) + continue; + ni = ns = 0; + for (n=0; n<2; n++) + if (KBASE(a->cls[n]) == 0) + ni++; + else + ns++; + if (nint >= ni && nsse >= ns) { + nint -= ni; + nsse -= ns; + } else + a->inmem = 1; + } + } + + return ((6-nint) << 4) | ((8-nsse) << 8); +} + +int rsave[] = { + RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 +}; +int rclob[] = {RBX, R12, R13, R14, R15}; + +MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); +MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); + +bits +retregs(Ref r, int p[2]) +{ + bits b; + int ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = r.val & 3; + nf = (r.val >> 2) & 3; + if (ni >= 1) + b |= BIT(RAX); + if (ni >= 2) + b |= BIT(RDX); + if (nf >= 1) + b |= BIT(XMM0); + if (nf >= 2) + b |= BIT(XMM1); + if (p) { + p[0] = ni; + p[1] = nf; + } + return b; +} + +bits +argregs(Ref r, int p[2]) +{ + bits b; + int j, ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = (r.val >> 4) & 15; + nf = (r.val >> 8) & 15; + for (j=0; jarg[1], R)) { + assert(rtype(i1->arg[1]) == RAType); + aclass(&aret, &typ[i1->arg[1].val & AMask]); + ca = classify(i0, i1, ac, OArg, &aret); + } else + ca = classify(i0, i1, ac, OArg, 0); + + for (stk=0, a=&ac[i1-i0]; a>ac;) + if ((--a)->inmem) { + assert(a->align <= 4); + stk += a->size; + if (a->align == 4) + stk += stk & 15; + } + stk += stk & 15; + if (stk) { + r = getcon(-(int64_t)stk, fn); + emit(OSAlloc, Kl, R, r, R); + } + + if (!req(i1->arg[1], R)) { + if (aret.inmem) { + /* get the return location from eax + * it saves one callee-save reg */ + r1 = newtmp("abi", Kl, fn); + emit(OCopy, Kl, i1->to, TMP(RAX), R); + ca += 1; + } else { + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + regcp[1] = newtmp("abi", aret.cls[1], fn); + emit(OStorel, 0, R, regcp[1], r); + emit(OAdd, Kl, r, i1->to, getcon(8, fn)); + chuse(i1->to, +1, fn); + ca += 1 << (2 * KBASE(aret.cls[1])); + } + regcp[0] = newtmp("abi", aret.cls[0], fn); + emit(OStorel, 0, R, regcp[0], i1->to); + ca += 1 << (2 * KBASE(aret.cls[0])); + retr(reg, &aret); + if (aret.size > 8) + emit(OCopy, aret.cls[1], regcp[1], reg[1], R); + emit(OCopy, aret.cls[0], regcp[0], reg[0], R); + r1 = i1->to; + } + /* allocate return pad */ + ra = alloc(sizeof *ra); + assert(NAlign == 3); + aret.align -= 2; + if (aret.align < 0) + aret.align = 0; + ra->i.op = OAlloc + aret.align; + ra->i.cls = Kl; + ra->i.to = r1; + ra->i.arg[0] = getcon(aret.size, fn); + ra->link = (*rap); + *rap = ra; + } else { + ra = 0; + if (KBASE(i1->cls) == 0) { + emit(OCopy, i1->cls, i1->to, TMP(RAX), R); + ca += 1; + } else { + emit(OCopy, i1->cls, i1->to, TMP(XMM0), R); + ca += 1 << 2; + } + } + emit(OCall, i1->cls, R, i1->arg[0], CALL(ca)); + emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); + + ni = ns = 0; + if (ra && aret.inmem) + emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ + for (i=i0, a=ac; iinmem) + continue; + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OArgc) { + if (a->size > 8) { + r2 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + emit(OLoad, a->cls[1], r2, r, R); + emit(OAdd, Kl, r, i->arg[1], getcon(8, fn)); + chuse(i->arg[1], +1, fn); + } + emit(OLoad, a->cls[0], r1, i->arg[1], R); + } else + emit(OCopy, i->cls, r1, i->arg[0], R); + } + + if (!stk) + return; + + r = newtmp("abi", Kl, fn); + chuse(r, -1, fn); + for (i=i0, a=ac, off=0; iinmem) + continue; + if (i->op == OArgc) { + if (a->align == 4) + off += off & 15; + blit(r, off, i->arg[1], a->size, fn); + } else { + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, i->arg[0], r1); + emit(OAdd, Kl, r1, r, getcon(off, fn)); + chuse(r, +1, fn); + } + off += a->size; + } + emit(OSAlloc, Kl, r, getcon(stk, fn), R); +} + +static void +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + AClass *ac, *a, aret; + Ins *i; + int ni, ns, s, al; + Ref r, r1; + + ac = alloc((i1-i0) * sizeof ac[0]); + curi = insb; + ni = ns = 0; + + if (fn->retty >= 0) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl}; + fn->retr = r; + } + classify(i0, i1, ac, OPar, &aret); + } else + classify(i0, i1, ac, OPar, 0); + + assert(NAlign == 3); + + s = 4; + for (i=i0, a=ac; iinmem) { + case 1: + assert(a->align <= 4); + if (a->align == 4) + s = (s+3) & -4; + fn->tmp[i->to.val].slot = -s; /* HACK! */ + s += a->size / 4; + continue; + case 2: + *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls}; + s += 2; + continue; + } + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OParc) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[0] = r.val; + if (a->size > 8) { + r1 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[1] = r.val; + } + } else + *curi++ = (Ins){OCopy, i->to, {r1}, i->cls}; + } + for (i=i0, a=ac; iop != OParc || a->inmem) + continue; + assert(NAlign == 3); + for (al=0; a->align >> (al+2); al++) + ; + r = TMP(a->cls[0]); + r1 = i->to; + *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl}; + *curi++ = (Ins){OStorel, R, {r, r1}, 0}; + if (a->size > 8) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl}; + r1 = TMP(a->cls[1]); + *curi++ = (Ins){OStorel, R, {r1, r}, 0}; + } + } +} + +static int +aref(Ref r, ANum *ai) +{ + switch (rtype(r)) { + default: + diag("isel: aref defaulted"); + case RCon: + return 2; + case RTmp: + return ai[r.val].n; + } +} + +static int +ascale(Ref r, Con *con) +{ + int64_t n; + + if (rtype(r) != RCon) + return 0; + if (con[r.val].type != CBits) + return 0; + n = con[r.val].bits.i; + return n == 1 || n == 2 || n == 4 || n == 8; +} + +static void +anumber(ANum *ai, Blk *b, Con *con) +{ + /* This should be made obsolete by a proper + * reassoc pass. + * + * Rules: + * + * RTmp(_) -> 0 tmp + * ( RTmp(_) -> 1 slot ) + * RCon(_) -> 2 con + * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) + */ + static char add[10][10] = { + [2] [2] = 2, /* folding */ + [2] [5] = 5, [5] [2] = 5, + [2] [6] = 6, [6] [2] = 6, + [2] [7] = 7, [7] [2] = 7, + [0] [0] = 4, /* 4: b + s * i */ + [0] [3] = 4, [3] [0] = 4, + [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */ + [0] [2] = 6, [2] [0] = 6, /* 6: o + b */ + [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */ + [0] [5] = 7, [5] [0] = 7, + [6] [3] = 7, [3] [6] = 7, + + }; + int a, a1, a2, n1, n2, t1, t2; + Ins *i; + + for (i=b->ins; i-b->ins < b->nins; i++) { + if (rtype(i->to) == RTmp) + ai[i->to.val].i = i; + if (i->op != OAdd && i->op != OMul) + continue; + a1 = aref(i->arg[0], ai); + a2 = aref(i->arg[1], ai); + t1 = a1 != 1 && a1 != 2; + t2 = a2 != 1 && a2 != 2; + if (i->op == OAdd) { + a = add[n1 = a1][n2 = a2]; + if (t1 && a < add[0][a2]) + a = add[n1 = 0][n2 = a2]; + if (t2 && a < add[a1][0]) + a = add[n1 = a1][n2 = 0]; + if (t1 && t2 && a < add[0][0]) + a = add[n1 = 0][n2 = 0]; + } else { + n1 = n2 = a = 0; + if (ascale(i->arg[0], con) && t2) + a = 3, n1 = 2, n2 = 0; + if (t1 && ascale(i->arg[1], con)) + a = 3, n1 = 0, n2 = 2; + } + ai[i->to.val].n = a; + ai[i->to.val].l = n1; + ai[i->to.val].r = n2; + } +} + +static void +amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top) +{ + Ins *i; + int nl, nr, t, s; + Ref al, ar; + + if (top) + memset(a, 0, sizeof *a); + if (rtype(r) == RCon) { + addcon(&a->offset, &fn->con[r.val]); + return; + } + assert(rtype(r) == RTmp); + i = ai[r.val].i; + nl = ai[r.val].l; + nr = ai[r.val].r; + if (i) { + if (nl > nr) { + al = i->arg[1]; + ar = i->arg[0]; + t = nl, nl = nr, nr = t; + } else { + al = i->arg[0]; + ar = i->arg[1]; + } + } + switch (ai[r.val].n) { + default: + diag("isel: amatch defaulted"); + case 3: /* s * i */ + if (!top) { + a->index = al; + a->scale = fn->con[ar.val].bits.i; + } else + a->base = r; + break; + case 4: /* b + s * i */ + switch (nr) { + case 0: + if (fn->tmp[ar.val].slot != -1) { + al = i->arg[1]; + ar = i->arg[0]; + } + a->index = ar; + a->scale = 1; + break; + case 3: + amatch(a, ar, ai, fn, 0); + break; + } + r = al; + case 0: + s = fn->tmp[r.val].slot; + if (s != -1) + r = SLOT(s); + a->base = r; + break; + case 2: /* constants */ + case 5: /* o + s * i */ + case 6: /* o + b */ + case 7: /* o + b + s * i */ + amatch(a, ar, ai, fn, 0); + amatch(a, al, ai, fn, 0); + break; + } +} + +/* instruction selection + * requires use counts (as given by parsing) + */ +void +isel(Fn *fn) +{ + Blk *b, **sb; + Ins *i, *i0, *ip; + Phi *p; + uint a; + int n, al; + int64_t sz; + ANum *ainfo; + RAlloc *ral; + + for (n=0; nntmp; n++) + fn->tmp[n].slot = -1; + fn->slot = 0; + + /* lower arguments */ + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (i->op != OPar && i->op != OParc) + break; + selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (curi - insb); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, insb, curi - insb); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower function calls and returns */ + ral = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) { + if ((--i)->op == OCall) { + for (i0=i; i0>b->ins; i0--) + if ((i0-1)->op != OArg) + if ((i0-1)->op != OArgc) + break; + selcall(fn, i0, i, &ral); + i = i0; + continue; + } + assert(i->op != OArg && i->op != OArgc); + emiti(*i); + } + if (b == fn->start) + for (; ral; ral=ral->link) + emiti(ral->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After call lowering:\n"); + printfn(fn, stderr); + } + + /* assign slots to fast allocs */ + b = fn->start; + assert(NAlign == 3 && "change n=4 and sz /= 4 below"); + for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2) + for (i=b->ins; i-b->ins < b->nins; i++) + if (i->op == al) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-3) + diag("isel: invalid alloc size"); + sz = (sz + n-1) & -n; + sz /= 4; + fn->tmp[i->to.val].slot = fn->slot; + fn->slot += sz; + *i = (Ins){.op = ONop}; + } + + /* process basic blocks */ + n = fn->ntmp; + ainfo = emalloc(n * sizeof ainfo[0]); + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) + for (p=(*sb)->phi; p; p=p->link) { + for (a=0; p->blk[a] != b; a++) + assert(a+1 < p->narg); + fixarg(&p->arg[a], p->cls, 1, fn); + } + memset(ainfo, 0, n * sizeof ainfo[0]); + anumber(ainfo, b, fn->con); + seljmp(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + sel(*--i, ainfo, fn); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + free(ainfo); + + if (debug['I']) { + fprintf(stderr, "\n> After instruction selection:\n"); + printfn(fn, stderr); + } +} diff --git a/live.c b/live.c new file mode 100644 index 0000000..44806e1 --- /dev/null +++ b/live.c @@ -0,0 +1,174 @@ +#include "all.h" + +void +liveon(BSet *v, Blk *b, Blk *s) +{ + Phi *p; + uint a; + + bscopy(v, s->in); + for (p=s->phi; p; p=p->link) { + bsclr(v, p->to.val); + for (a=0; anarg; a++) + if (p->blk[a] == b) + if (rtype(p->arg[a]) == RTmp) + bsset(v, p->arg[a].val); + } +} + +static int +phitmp(int t, Tmp *tmp) +{ + int tp; + + tp = tmp[t].phi; + return tp ? tp : t; +} + +static void +phifix(int t1, short *phi, Tmp *tmp) +{ + int t, t2; + + /* detect temporaries arguments + * of the same phi node that + * interfere and separate them + */ + t = phitmp(t1, tmp); + t2 = phi[t]; + if (t2 && t2 != t1) { + if (t != t1) { + tmp[t1].phi = t1; + t = t1; + } else { + tmp[t2].phi = t2; + phi[t2] = t2; + } + } + phi[t] = t1; +} + +static void +bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp) +{ + + if (rtype(r) != RTmp) + return; + bsset(b->gen, r.val); + phifix(r.val, phi, tmp); + if (!bshas(b->in, r.val)) { + nlv[KBASE(tmp[r.val].cls)]++; + bsset(b->in, r.val); + } +} + +/* liveness analysis + * requires rpo computation + */ +void +filllive(Fn *f) +{ + Blk *b; + Ins *i; + int k, t, m[2], n, chg, nlv[2]; + short *phi; + BSet u[1], v[1]; + Mem *ma; + + bsinit(u, f->ntmp); + bsinit(v, f->ntmp); + phi = emalloc(f->ntmp * sizeof phi[0]); + for (b=f->start; b; b=b->link) { + bsinit(b->in, f->ntmp); + bsinit(b->out, f->ntmp); + bsinit(b->gen, f->ntmp); + } + chg = 1; +Again: + for (n=f->nblk-1; n>=0; n--) { + b = f->rpo[n]; + + bscopy(u, b->out); + if (b->s1) { + liveon(v, b, b->s1); + bsunion(b->out, v); + } + if (b->s2) { + liveon(v, b, b->s2); + bsunion(b->out, v); + } + chg |= !bsequal(b->out, u); + + memset(phi, 0, f->ntmp * sizeof phi[0]); + memset(nlv, 0, sizeof nlv); + bscopy(b->in, b->out); + for (t=0; tntmp; t++) + if (bshas(b->in, t)) { + phifix(t, phi, f->tmp); + nlv[KBASE(f->tmp[t].cls)]++; + } + if (rtype(b->jmp.arg) == RACall) { + assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0); + b->in->t[0] |= retregs(b->jmp.arg, nlv); + } else + bset(b->jmp.arg, b, nlv, phi, f->tmp); + for (k=0; k<2; k++) + b->nlive[k] = nlv[k]; + for (i=&b->ins[b->nins]; i!=b->ins;) { + if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) { + b->in->t[0] &= ~retregs(i->arg[1], m); + for (k=0; k<2; k++) + nlv[k] -= m[k]; + if (nlv[0] + NISave > b->nlive[0]) + b->nlive[0] = nlv[0] + NISave; + if (nlv[1] + NFSave > b->nlive[1]) + b->nlive[1] = nlv[1] + NFSave; + b->in->t[0] |= argregs(i->arg[1], m); + for (k=0; k<2; k++) + nlv[k] += m[k]; + } + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + if (bshas(b->in, i->to.val)) + nlv[KBASE(f->tmp[t].cls)]--; + bsset(b->gen, t); + bsclr(b->in, t); + phi[phitmp(t, f->tmp)] = 0; + } + for (k=0; k<2; k++) + switch (rtype(i->arg[k])) { + case RAMem: + ma = &f->mem[i->arg[k].val & AMask]; + bset(ma->base, b, nlv, phi, f->tmp); + bset(ma->index, b, nlv, phi, f->tmp); + break; + default: + bset(i->arg[k], b, nlv, phi, f->tmp); + break; + } + for (k=0; k<2; k++) + if (nlv[k] > b->nlive[k]) + b->nlive[k] = nlv[k]; + } + } + if (chg) { + chg = 0; + goto Again; + } + free(phi); + + if (debug['L']) { + fprintf(stderr, "\n> Liveness analysis:\n"); + for (b=f->start; b; b=b->link) { + fprintf(stderr, "\t%-10sin: ", b->name); + dumpts(b->in, f->tmp, stderr); + fprintf(stderr, "\t out: "); + dumpts(b->out, f->tmp, stderr); + fprintf(stderr, "\t gen: "); + dumpts(b->gen, f->tmp, stderr); + fprintf(stderr, "\t live: "); + fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]); + } + } +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..c1664be --- /dev/null +++ b/main.c @@ -0,0 +1,140 @@ +#include "all.h" +#include "config.h" +#include +#include + +char debug['Z'+1] = { + ['P'] = 0, /* parsing */ + ['A'] = 0, /* abi lowering */ + ['I'] = 0, /* instruction selection */ + ['L'] = 0, /* liveness */ + ['M'] = 0, /* memory optimization */ + ['N'] = 0, /* ssa construction */ + ['C'] = 0, /* copy elimination */ + ['S'] = 0, /* spilling */ + ['R'] = 0, /* reg. allocation */ +}; + +static FILE *outf; +static int dbg; + +static void +data(Dat *d) +{ + if (dbg) + return; + if (d->type == DEnd) { + fputs("/* end data */\n\n", outf); + freeall(); + } + emitdat(d, outf); +} + +static void +func(Fn *fn) +{ + int n; + + if (dbg) + fprintf(stderr, "**** Function %s ****", fn->name); + if (debug['P']) { + fprintf(stderr, "\n> After parsing:\n"); + printfn(fn, stderr); + } + fillrpo(fn); + fillpreds(fn); + filluse(fn); + memopt(fn); + ssa(fn); + filluse(fn); + copy(fn); + filluse(fn); + isel(fn); + filllive(fn); + fillcost(fn); + spill(fn); + rega(fn); + fillrpo(fn); + assert(fn->rpo[0] == fn->start); + for (n=0;; n++) + if (n == fn->nblk-1) { + fn->rpo[n]->link = 0; + break; + } else + fn->rpo[n]->link = fn->rpo[n+1]; + if (!dbg) { + emitfn(fn, outf); + fprintf(outf, "/* end function %s */\n\n", fn->name); + } else + fprintf(stderr, "\n"); + freeall(); +} + +int +main(int ac, char *av[]) +{ + FILE *inf; + char *f; + int c, asm; + + asm = Defaultasm; + outf = stdout; + while ((c = getopt(ac, av, "d:o:G:")) != -1) + switch (c) { + case 'd': + for (; *optarg; optarg++) + if (isalpha(*optarg)) { + debug[toupper(*optarg)] = 1; + dbg = 1; + } + break; + case 'o': + if (strcmp(optarg, "-") != 0) + outf = fopen(optarg, "w"); + break; + case 'G': + if (strcmp(optarg, "e") == 0) + asm = Gaself; + else if (strcmp(optarg, "m") == 0) + asm = Gasmacho; + else { + fprintf(stderr, "unknown gas flavor '%s'\n", optarg); + exit(1); + } + break; + default: + fprintf(stderr, "usage: %s [-d ] [-o out] {file.ssa, -}\n", av[0]); + exit(1); + } + + switch (asm) { + case Gaself: + locprefix = ".L"; + symprefix = ""; + break; + case Gasmacho: + locprefix = "L"; + symprefix = "_"; + break; + } + + do { + f = av[optind]; + if (!f || strcmp(f, "-") == 0) { + inf = stdin; + f = "-"; + } else { + inf = fopen(f, "r"); + if (!inf) { + fprintf(stderr, "cannot open '%s'\n", f); + exit(1); + } + } + parse(inf, f, data, func); + } while (++optind < ac); + + if (!dbg) + emitfin(outf); + + exit(0); +} diff --git a/mem.c b/mem.c new file mode 100644 index 0000000..bda43d7 --- /dev/null +++ b/mem.c @@ -0,0 +1,81 @@ +#include "all.h" + +/* Memory optimization: + * + * - replace alloced slots used only in + * load/store operations + * Assumption: all the accesses have the + * same size (this could be wrong...) + */ + +/* require use, maintains use counts */ +void +memopt(Fn *fn) +{ + Blk *b; + Ins *i, *l; + Tmp *t; + Use *u, *ue; + int a; + + b = fn->start; + for (i=b->ins; i-b->ins < b->nins; i++) { + if (OAlloc > i->op || i->op > OAlloc1) + continue; + assert(NAlign == 3); + assert(rtype(i->to) == RTmp); + t = &fn->tmp[i->to.val]; + for (u=t->use; u != &t->use[t->nuse]; u++) { + if (u->type != UIns) + goto NextIns; + l = u->u.ins; + if (!isload(l->op) + && (!isstore(l->op) || req(i->to, l->arg[0]))) + goto NextIns; + } + /* get rid of the alloc and replace uses */ + *i = (Ins){.op = ONop}; + t->ndef--; + ue = &t->use[t->nuse]; + for (u=t->use; u!=ue; u++) { + l = u->u.ins; + if (isstore(l->op)) { + if (l->op == OStores) + l->cls = Kd; + else if (l->op == OStored) + l->cls = Kd; + else if (l->op == OStorel) + l->cls = Kl; + else + l->cls = Kw; + l->op = OCopy; + l->to = l->arg[1]; + l->arg[1] = R; + t->nuse--; + t->ndef++; + } else + /* try to turn loads into copies so we + * can eliminate them later */ + switch(l->op) { + case OLoad: + l->op = OCopy; + break; + case OLoadsw: + case OLoaduw: + l->cls = Kw; + l->op = OCopy; + break; + default: + /* keep l->cls */ + a = l->op - OLoadsw; + l->op = OExtsw + a; + break; + } + } + NextIns:; + } + if (debug['M']) { + fprintf(stderr, "\n> After memory optimization:\n"); + printfn(fn, stderr); + } +} diff --git a/minic/LICENSE b/minic/LICENSE deleted file mode 100644 index 8af557b..0000000 --- a/minic/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT/X Consortium License - -© 2015-2016 Quentin Carbonneaux - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/minic/Makefile b/minic/Makefile index 66f0f04..856f2d3 100644 --- a/minic/Makefile +++ b/minic/Makefile @@ -6,9 +6,7 @@ $(BIN): yacc minic.y ./yacc minic.y $(CC) $(CFLAGS) -o $@ y.tab.c -all: $(BIN) -check: clean: rm -f yacc minic y.* -.PHONY: all check clean +.PHONY: clean diff --git a/minic/mcc b/minic/mcc index 5a95042..1c54fb9 100755 --- a/minic/mcc +++ b/minic/mcc @@ -29,7 +29,7 @@ fi ../minic/minic < $file > /tmp/minic.ssa && -../src/qbe < /tmp/minic.ssa > /tmp/minic.s && +../obj/qbe < /tmp/minic.ssa > /tmp/minic.s && cc $flags /tmp/minic.s if test $? -ne 0 diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..2590971 --- /dev/null +++ b/parse.c @@ -0,0 +1,1099 @@ +#include "all.h" +#include +#include + +enum { + Kx = -1, /* Invalid operand */ + Km = Kl, /* Memory pointer (for x64) */ +}; + +OpDesc opdesc[NOp] = { +#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d} + + /* NAME NM ARGCLS0 ARGCLS1 SF LF */ + [OAdd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OSub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [ODiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [ORem] = { "rem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, + [OUDiv] = { "udiv", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OURem] = { "urem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, + [OMul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OAnd] = { "and", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OOr] = { "or", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OXor] = { "xor", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OSar] = { "sar", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OShr] = { "shr", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OShl] = { "shl", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OStored] = { "stored", 0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 }, + [OStores] = { "stores", 0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 }, + [OStorel] = { "storel", 0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 }, + [OStorew] = { "storew", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OStoreh] = { "storeh", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OStoreb] = { "storeb", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OLoad] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 }, + [OLoadsw] = { "loadsw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoaduw] = { "loaduw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadsh] = { "loadsh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoaduh] = { "loaduh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadsb] = { "loadsb", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadub] = { "loadub", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OExtsw] = { "extsw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtuw] = { "extuw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtsh] = { "extsh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtuh] = { "extuh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtsb] = { "extsb", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtub] = { "extub", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExts] = { "exts", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OTruncd] = { "truncd", 0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 }, + [OFtosi] = { "ftosi", 0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 }, + [OSitof] = { "sitof", 0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 }, + [OCast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 }, + [OCopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 }, + [ONop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, + [OSwap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OSign] = { "sign", 0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OSAlloc] = { "salloc", 0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OXDiv] = { "xdiv", 1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OXCmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OXTest] = { "xtest", 1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 }, + [OAddr] = { "addr", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OPar] = { "parn", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OParc] = { "parc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OArg] = { "arg", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OArgc] = { "argc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OCall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 }, + [OXSetnp] = { "xsetnp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OXSetp] = { "xsetp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OAlloc] = { "alloc4", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, + [OAlloc+1] = { "alloc8", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, + [OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, +#define X(c) \ + [OCmpw+IC##c] = { "c" #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \ + [OCmpl+IC##c] = { "c" #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \ + [OXSet+IC##c] = { "xset" #c, 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, + ICMPS(X) +#undef X +#define X(c) \ + [OCmps+FC##c] = { "c" #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \ + [OCmpd+FC##c] = { "c" #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 }, + FCMPS(X) +#undef X + +}; +#undef A + +typedef enum { + PXXX, + PLbl, + PPhi, + PIns, + PEnd, +} PState; + +enum { + TXXX = NPubOp, + TCall, + TPhi, + TJmp, + TJnz, + TRet, + TExport, + TFunc, + TType, + TData, + TAlign, + TL, + TW, + TH, + TB, + TD, + TS, + TZ, + + TInt, + TFlts, + TFltd, + TTmp, + TLbl, + TGlo, + TTyp, + TStr, + + TPlus, + TEq, + TComma, + TLParen, + TRParen, + TLBrace, + TRBrace, + TNL, + TEOF, +}; + + +static FILE *inf; +static char *inpath; +static int thead; +static struct { + char chr; + double fltd; + float flts; + int64_t num; + char *str; +} tokval; +static int lnum; + +static Tmp *tmp; +static Con *con; +static int ntmp; +static int ncon; +static Phi **plink; +static Blk **bmap; +static Blk *curb; +static Blk **blink; +static int nblk; +static int rcls; +static int ntyp; + + +void +err(char *s, ...) +{ + char buf[100], *p, *end; + va_list ap; + + p = buf; + end = buf + sizeof(buf); + + va_start(ap, s); + p += snprintf(p, end - p, "%s:%d: ", inpath, lnum); + p += vsnprintf(p, end - p, s, ap); + va_end(ap); + + diag(buf); +} + +static int +lex() +{ + static struct { + char *str; + int tok; + } tmap[] = { + { "call", TCall }, + { "phi", TPhi }, + { "jmp", TJmp }, + { "jnz", TJnz }, + { "ret", TRet }, + { "export", TExport }, + { "function", TFunc }, + { "type", TType }, + { "data", TData }, + { "align", TAlign }, + { "l", TL }, + { "w", TW }, + { "h", TH }, + { "b", TB }, + { "d", TD }, + { "s", TS }, + { "z", TZ }, + { "loadw", OLoad }, /* for convenience */ + { "loadl", OLoad }, + { "loads", OLoad }, + { "loadd", OLoad }, + { "alloc1", OAlloc }, + { "alloc2", OAlloc }, + { 0, TXXX } + }; + static char tok[NString]; + int c, i; + int t; + + do + c = fgetc(inf); + while (isblank(c)); + t = TXXX; + tokval.chr = c; + switch (c) { + case EOF: + return TEOF; + case ',': + return TComma; + case '(': + return TLParen; + case ')': + return TRParen; + case '{': + return TLBrace; + case '}': + return TRBrace; + case '=': + return TEq; + case '+': + return TPlus; + case 's': + if (fscanf(inf, "_%f", &tokval.flts) != 1) + break; + return TFlts; + case 'd': + if (fscanf(inf, "_%lf", &tokval.fltd) != 1) + break; + return TFltd; + case '%': + t = TTmp; + goto Alpha; + case '@': + t = TLbl; + goto Alpha; + case '$': + t = TGlo; + goto Alpha; + case ':': + t = TTyp; + goto Alpha; + case '#': + while (fgetc(inf) != '\n') + ; + case '\n': + lnum++; + return TNL; + } + if (isdigit(c) || c == '-' || c == '+') { + ungetc(c, inf); + if (fscanf(inf, "%"SCNd64, &tokval.num) != 1) + err("invalid integer literal"); + return TInt; + } + if (c == '"') { + tokval.str = vnew(0, 1); + for (i=0;; i++) { + c = fgetc(inf); + vgrow(&tokval.str, i+1); + if (c == '"') + if (!i || tokval.str[i-1] != '\\') { + tokval.str[i] = 0; + return TStr; + } + tokval.str[i] = c; + } + } + if (0) +Alpha: c = fgetc(inf); + if (!isalpha(c) && c != '.' && c != '_') + err("lexing failure: invalid character %c (%d)", c, c); + i = 0; + do { + if (i >= NString-1) + err("identifier too long"); + tok[i++] = c; + c = fgetc(inf); + } while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c)); + tok[i] = 0; + ungetc(c, inf); + tokval.str = tok; + if (t != TXXX) { + return t; + } + for (i=0; i= NIns) + err("too many instructions (1)"); + k = parsecls(&ty); + r = parseref(); + if (req(r, R)) + err("invalid reference argument"); + if (!arg && rtype(r) != RTmp) + err("invalid function parameter"); + if (k == 4) + if (arg) + *curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl}; + else + *curi = (Ins){OParc, r, {TYPE(ty)}, Kl}; + else + if (arg) + *curi = (Ins){OArg, R, {r}, k}; + else + *curi = (Ins){OPar, r, {R}, k}; + curi++; + t = next(); + if (t == TRParen) + break; + if (t != TComma) + err(", or ) expected"); + } +} + +static Blk * +findblk(char *name) +{ + int i; + + for (i=0; iname, name) == 0) + return bmap[i]; + vgrow(&bmap, ++nblk); + bmap[i] = blknew(); + strcpy(bmap[i]->name, name); + return bmap[i]; +} + +static void +closeblk() +{ + curb->nins = curi - insb; + idup(&curb->ins, insb, curb->nins); + blink = &curb->link; + curi = insb; +} + +static PState +parseline(PState ps) +{ + Ref arg[NPred] = {R}; + Blk *blk[NPred]; + Phi *phi; + Ref r; + Blk *b; + int t, op, i, k, ty; + + t = nextnl(); + if (ps == PLbl && t != TLbl && t != TRBrace) + err("label or } expected"); + switch (t) { + default: + if (isstore(t)) { + /* operations without result */ + r = R; + k = 0; + op = t; + goto DoOp; + } + err("label, instruction or jump expected"); + case TRBrace: + return PEnd; + case TTmp: + break; + case TLbl: + b = findblk(tokval.str); + if (b->jmp.type != JXXX) + err("multiple definitions of block"); + if (curb && curb->jmp.type == JXXX) { + closeblk(); + curb->jmp.type = JJmp; + curb->s1 = b; + } + *blink = b; + curb = b; + plink = &curb->phi; + expect(TNL); + return PPhi; + case TRet: + curb->jmp.type = (int[]){ + JRetw, JRetl, + JRets, JRetd, + JRetc, JRet0 + }[rcls]; + if (rcls < 5) { + r = parseref(); + if (req(r, R)) + err("return value expected"); + curb->jmp.arg = r; + } + goto Close; + case TJmp: + curb->jmp.type = JJmp; + goto Jump; + case TJnz: + curb->jmp.type = JJnz; + r = parseref(); + if (req(r, R)) + err("invalid argument for jnz jump"); + curb->jmp.arg = r; + expect(TComma); + Jump: + expect(TLbl); + curb->s1 = findblk(tokval.str); + if (curb->jmp.type != JJmp) { + expect(TComma); + expect(TLbl); + curb->s2 = findblk(tokval.str); + } + Close: + expect(TNL); + closeblk(); + return PLbl; + } + r = tmpref(tokval.str); + expect(TEq); + k = parsecls(&ty); + op = next(); +DoOp: + if (op == TPhi) { + if (ps != PPhi) + err("unexpected phi instruction"); + op = -1; + } + if (op == TCall) { + arg[0] = parseref(); + parserefl(1); + expect(TNL); + op = OCall; + if (k == 4) { + k = Kl; + arg[1] = TYPE(ty); + } else + arg[1] = R; + goto Ins; + } + if (k == 4) + err("size class must be w, l, s, or d"); + if (op >= NPubOp) + err("invalid instruction"); + i = 0; + if (peek() != TNL) + for (;;) { + if (i == NPred) + err("too many arguments"); + if (op == -1) { + expect(TLbl); + blk[i] = findblk(tokval.str); + } + arg[i] = parseref(); + if (req(arg[i], R)) + err("invalid instruction argument"); + i++; + t = peek(); + if (t == TNL) + break; + if (t != TComma) + err(", or end of line expected"); + next(); + } + next(); + if (op != -1) { + Ins: + if (curi - insb >= NIns) + err("too many instructions (2)"); + curi->op = op; + curi->cls = k; + curi->to = r; + curi->arg[0] = arg[0]; + curi->arg[1] = arg[1]; + curi++; + return PIns; + } else { + phi = alloc(sizeof *phi); + phi->to = r; + phi->cls = k; + memcpy(phi->arg, arg, i * sizeof arg[0]); + memcpy(phi->blk, blk, i * sizeof blk[0]); + phi->narg = i; + *plink = phi; + plink = &phi->link; + return PPhi; + } +} + +static Fn * +parsefn(int export) +{ + PState ps; + Fn *fn; + + ntmp = Tmp0; + ncon = 1; /* first constant must be 0 */ + curb = 0; + nblk = 0; + curi = insb; + tmp = vnew(ntmp, sizeof tmp[0]); + con = vnew(ncon, sizeof con[0]); + bmap = vnew(nblk, sizeof bmap[0]); + con[0].type = CBits; + fn = alloc(sizeof *fn); + fn->export = export; + blink = &fn->start; + fn->retty = -1; + if (peek() != TGlo) + rcls = parsecls(&fn->retty); + else + rcls = 5; + if (next() != TGlo) + err("function name expected"); + strcpy(fn->name, tokval.str); + parserefl(0); + if (nextnl() != TLBrace) + err("function body must start with {"); + ps = PLbl; + do + ps = parseline(ps); + while (ps != PEnd); + if (!curb) + err("empty file"); + if (curb->jmp.type == JXXX) + err("last block misses jump"); + fn->tmp = tmp; + fn->con = con; + fn->mem = vnew(0, sizeof fn->mem[0]); + fn->ntmp = ntmp; + fn->ncon = ncon; + fn->nmem = 0; + fn->nblk = nblk; + fn->rpo = 0; + return fn; +} + +static void +parsetyp() +{ + Typ *ty; + int t, n, sz, al, s, a, c, flt; + + if (ntyp >= NTyp) + err("too many type definitions"); + ty = &typ[ntyp++]; + ty->align = -1; + if (nextnl() != TTyp || nextnl() != TEq) + err("type name, then = expected"); + strcpy(ty->name, tokval.str); + t = nextnl(); + if (t == TAlign) { + if (nextnl() != TInt) + err("alignment expected"); + for (al=0; tokval.num /= 2; al++) + ; + ty->align = al; + t = nextnl(); + } + if (t != TLBrace) + err("type body must start with {"); + t = nextnl(); + if (t == TInt) { + ty->dark = 1; + ty->size = tokval.num; + if (ty->align == -1) + err("dark types need alignment"); + t = nextnl(); + } else { + ty->dark = 0; + n = -1; + sz = 0; + al = 0; + for (;;) { + flt = 0; + switch (t) { + default: err("invalid size specifier %c", tokval.chr); + case TD: flt = 1; + case TL: s = 8; a = 3; break; + case TS: flt = 1; + case TW: s = 4; a = 2; break; + case TH: s = 2; a = 1; break; + case TB: s = 1; a = 0; break; + } + if (a > al) + al = a; + if ((a = sz & (s-1))) { + a = s - a; + if (++n < NSeg) { + /* padding segment */ + ty->seg[n].ispad = 1; + ty->seg[n].len = a; + } + } + t = nextnl(); + if (t == TInt) { + c = tokval.num; + t = nextnl(); + } else + c = 1; + while (c-- > 0) { + if (++n < NSeg) { + ty->seg[n].isflt = flt; + ty->seg[n].ispad = 0; + ty->seg[n].len = s; + } + sz += a + s; + } + if (t != TComma) + break; + t = nextnl(); + } + if (++n >= NSeg) + ty->dark = 1; + else + ty->seg[n].len = 0; + if (ty->align == -1) + ty->align = al; + else + al = ty->align; + a = (1 << al) - 1; + ty->size = (sz + a) & ~a; + } + if (t != TRBrace) + err("expected closing }"); +} + +static void +parsedatref(Dat *d) +{ + int t; + + d->isref = 1; + d->u.ref.nam = tokval.str; + d->u.ref.off = 0; + t = peek(); + if (t == TPlus) { + next(); + if (next() != TInt) + err("invalid token after offset in ref"); + d->u.ref.off = tokval.num; + } +} + +static void +parsedatstr(Dat *d) +{ + d->isstr = 1; + d->u.str = tokval.str; +} + +static void +parsedat(void cb(Dat *), int export) +{ + char s[NString]; + int t; + Dat d; + + d.type = DStart; + d.isstr = 0; + d.isref = 0; + d.export = export; + cb(&d); + if (nextnl() != TGlo || nextnl() != TEq) + err("data name, then = expected"); + strcpy(s, tokval.str); + t = nextnl(); + if (t == TAlign) { + if (nextnl() != TInt) + err("alignment expected"); + d.type = DAlign; + d.u.num = tokval.num; + cb(&d); + t = nextnl(); + } + d.type = DName; + d.u.str = s; + cb(&d); + + if (t != TLBrace) + err("expected data contents in { .. }"); + for (;;) { + switch (nextnl()) { + default: err("invalid size specifier %c in data", tokval.chr); + case TRBrace: goto Done; + case TL: d.type = DL; break; + case TW: d.type = DW; break; + case TH: d.type = DH; break; + case TB: d.type = DB; break; + case TS: d.type = DW; break; + case TD: d.type = DL; break; + case TZ: d.type = DZ; break; + } + t = nextnl(); + do { + d.isref = 0; + d.isstr = 0; + memset(&d.u, 0, sizeof d.u); + if (t == TFlts) + d.u.flts = tokval.flts; + else if (t == TFltd) + d.u.fltd = tokval.fltd; + else if (t == TInt) + d.u.num = tokval.num; + else if (t == TGlo) + parsedatref(&d); + else if (t == TStr) + parsedatstr(&d); + else + err("constant literal expected"); + cb(&d); + t = nextnl(); + } while (t == TInt || t == TFlts || t == TFltd); + if (t == TRBrace) + break; + if (t != TComma) + err(", or } expected"); + } +Done: + d.type = DEnd; + cb(&d); +} + +void +parse(FILE *f, char *path, void data(Dat *), void func(Fn *)) +{ + int t, export; + + inf = f; + inpath = path; + lnum = 1; + thead = TXXX; + ntyp = 0; + for (;;) { + export = 0; + switch (nextnl()) { + default: + err("top-level definition expected"); + case TExport: + export = 1; + t = nextnl(); + if (t == TFunc) { + case TFunc: + func(parsefn(export)); + break; + } + else if (t == TData) { + case TData: + parsedat(data, export); + break; + } + else + err("export can only qualify data and function"); + case TType: + parsetyp(); + break; + case TEOF: + return; + } + } +} + +static void +printcon(Con *c, FILE *f) +{ + switch (c->type) { + case CUndef: + break; + case CAddr: + fprintf(f, "$%s", c->label); + if (c->bits.i) + fprintf(f, "%+"PRIi64, c->bits.i); + break; + case CBits: + if (c->flt == 1) + fprintf(f, "s_%f", c->bits.s); + else if (c->flt == 2) + fprintf(f, "d_%lf", c->bits.d); + else + fprintf(f, "%"PRIi64, c->bits.i); + break; + } +} + +void +printref(Ref r, Fn *fn, FILE *f) +{ + int i; + Mem *m; + + switch (rtype(r)) { + case RTmp: + if (r.val < Tmp0) + fprintf(f, "R%d", r.val); + else + fprintf(f, "%%%s", fn->tmp[r.val].name); + break; + case RCon: + printcon(&fn->con[r.val], f); + break; + case RSlot: + fprintf(f, "S%d", r.val); + break; + case RACall: + fprintf(f, "%03x", r.val & AMask); + break; + case RAType: + fprintf(f, ":%s", typ[r.val & AMask].name); + break; + case RAMem: + i = 0; + m = &fn->mem[r.val & AMask]; + fputc('[', f); + if (m->offset.type != CUndef) { + printcon(&m->offset, f); + i = 1; + } + if (!req(m->base, R)) { + if (i) + fprintf(f, " + "); + printref(m->base, fn, f); + i = 1; + } + if (!req(m->index, R)) { + if (i) + fprintf(f, " + "); + fprintf(f, "%d * ", m->scale); + printref(m->index, fn, f); + } + fputc(']', f); + break; + } +} + +void +printfn(Fn *fn, FILE *f) +{ + static char *jtoa[NJmp] = { + [JRet0] = "ret", + [JRetw] = "retw", + [JRetl] = "retl", + [JRetc] = "retc", + [JRets] = "rets", + [JRetd] = "retd", + [JJnz] = "jnz", + [JXJnp] = "xjnp", + [JXJp] = "xjp", + #define X(c) [JXJc+IC##c] = "xj" #c, + ICMPS(X) + #undef X + }; + static char prcls[NOp] = { + [OArg] = 1, + [OSwap] = 1, + [OXCmp] = 1, + [OXTest] = 1, + [OXDiv] = 1, + [OXIDiv] = 1, + }; + static char ktoc[] = "wlsd"; + Blk *b; + Phi *p; + Ins *i; + uint n; + + if (fn->export) + fprintf(f, "export "); + fprintf(f, "function $%s() {\n", fn->name); + for (b=fn->start; b; b=b->link) { + fprintf(f, "@%s\n", b->name); + for (p=b->phi; p; p=p->link) { + fprintf(f, "\t"); + printref(p->to, fn, f); + fprintf(f, " =%c phi ", ktoc[p->cls]); + assert(p->narg); + for (n=0;; n++) { + fprintf(f, "@%s ", p->blk[n]->name); + printref(p->arg[n], fn, f); + if (n == p->narg-1) { + fprintf(f, "\n"); + break; + } else + fprintf(f, ", "); + } + } + for (i=b->ins; i-b->ins < b->nins; i++) { + fprintf(f, "\t"); + if (!req(i->to, R)) { + printref(i->to, fn, f); + fprintf(f, " =%c ", ktoc[i->cls]); + } + assert(opdesc[i->op].name); + fprintf(f, "%s", opdesc[i->op].name); + if (req(i->to, R) && prcls[i->op]) + fputc(ktoc[i->cls], f); + if (!req(i->arg[0], R)) { + fprintf(f, " "); + printref(i->arg[0], fn, f); + } + if (!req(i->arg[1], R)) { + fprintf(f, ", "); + printref(i->arg[1], fn, f); + } + fprintf(f, "\n"); + } + switch (b->jmp.type) { + case JRet0: + case JRetw: + case JRetl: + case JRets: + case JRetd: + case JRetc: + fprintf(f, "\t%s", jtoa[b->jmp.type]); + if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) { + fprintf(f, " "); + printref(b->jmp.arg, fn, f); + } + if (b->jmp.type == JRetc) + fprintf(f, ", :%s", typ[fn->retty].name); + fprintf(f, "\n"); + break; + case JJmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp @%s\n", b->s1->name); + break; + default: + fprintf(f, "\t%s ", jtoa[b->jmp.type]); + if (b->jmp.type == JJnz) { + printref(b->jmp.arg, fn, f); + fprintf(f, ", "); + } + fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name); + break; + } + } + fprintf(f, "}\n"); +} diff --git a/rega.c b/rega.c new file mode 100644 index 0000000..7f8edcf --- /dev/null +++ b/rega.c @@ -0,0 +1,598 @@ +#include "all.h" + +#ifdef TEST_PMOV + #undef assert + #define assert(x) assert_test(#x, x) +#endif + +typedef struct RMap RMap; + +struct RMap { + int t[NIReg+NFReg]; + int r[NIReg+NFReg]; + BSet b[1]; + int n; +}; + +static bits regu; /* registers used */ +static Tmp *tmp; /* function temporaries */ +static Mem *mem; /* function mem references */ +static struct { + Ref src, dst; + int cls; +} *pm; /* parallel move constructed */ +static int cpm, npm; /* capacity and size of pm */ + +static int * +hint(int t) +{ + return &tmp[phicls(t, tmp)].hint.r; +} + +static void +sethint(int t, int r) +{ + bits m; + + m = tmp[phicls(t, tmp)].hint.m; + if (*hint(t) == -1) + if (!(BIT(r) & m)) + *hint(t) = r; +} + +static void +rcopy(RMap *ma, RMap *mb) +{ + memcpy(ma->t, mb->t, sizeof ma->t); + memcpy(ma->r, mb->r, sizeof ma->r); + bscopy(ma->b, mb->b); + ma->n = mb->n; +} + +static int +rfind(RMap *m, int t) +{ + int i; + + for (i=0; in; i++) + if (m->t[i] == t) + return m->r[i]; + return -1; +} + +static Ref +rref(RMap *m, int t) +{ + int r, s; + + r = rfind(m, t); + if (r == -1) { + s = tmp[t].slot; + assert(s != -1 && "should have spilled"); + return SLOT(s); + } else + return TMP(r); +} + +static void +radd(RMap *m, int t, int r) +{ + assert((t >= Tmp0 || t == r) && "invalid temporary"); + assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register"); + assert(!bshas(m->b, t) && "temporary has mapping"); + assert(!bshas(m->b, r) && "register already allocated"); + assert(m->n <= NIReg+NFReg && "too many mappings"); + bsset(m->b, t); + bsset(m->b, r); + m->t[m->n] = t; + m->r[m->n] = r; + m->n++; + regu |= BIT(r); +} + +static Ref +ralloc(RMap *m, int t) +{ + bits regs; + int r, r0, r1; + + if (t < Tmp0) { + assert(bshas(m->b, t)); + return TMP(t); + } + if (bshas(m->b, t)) { + r = rfind(m, t); + assert(r != -1); + return TMP(r); + } + r = *hint(t); + if (r == -1 || bshas(m->b, r)) { + regs = tmp[phicls(t, tmp)].hint.m; + regs |= m->b->t[0]; + switch (KBASE(tmp[t].cls)) { + case 0: + r0 = RAX; + r1 = RAX + NIReg; + break; + case 1: + r0 = XMM0; + r1 = XMM0 + NFReg; + break; + } + for (r=r0; rb, r)) + goto Found; + diag("rega: no more regs"); + } +Found: + radd(m, t, r); + sethint(t, r); + return TMP(r); +} + +static int +rfree(RMap *m, int t) +{ + int i, r; + + if (!bshas(m->b, t)) + return -1; + for (i=0; m->t[i] != t; i++) + assert(i+1 < m->n); + r = m->r[i]; + bsclr(m->b, t); + bsclr(m->b, r); + m->n--; + memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); + memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); + return r; +} + +static void +mdump(RMap *m) +{ + int i; + + for (i=0; in; i++) + fprintf(stderr, " (%s, R%d)", + tmp[m->t[i]].name, + m->r[i]); + fprintf(stderr, "\n"); +} + +static void +pmadd(Ref src, Ref dst, int k) +{ + if (npm == cpm) { + cpm = cpm * 2 + 16; + pm = realloc(pm, cpm * sizeof pm[0]); + if (!pm) + diag("pmadd: out of memory"); + } + pm[npm].src = src; + pm[npm].dst = dst; + pm[npm].cls = k; + npm++; +} + +enum PMStat { ToMove, Moving, Moved }; + +static Ref +pmrec(enum PMStat *status, int i, int *k) +{ + Ref swp, swp1; + int j, k1; + + /* note, this routine might emit + * too many large instructions: + * + * , x -- x + * x -- x -- x | + * ` x -- x + * + * if only the first move is wide + * the whole cycle will be wide, + * this is safe but not necessary + */ + + if (req(pm[i].src, pm[i].dst)) + return R; + status[i] = Moving; + assert(KBASE(*k) == KBASE(pm[i].cls)); + assert((Kw|1) == Kl && (Ks|1) == Kd); + *k |= KWIDE(pm[i].cls); /* see above */ + swp = R; + for (j=0; jb, r) && r1 != r) { + /* r is used and not by to */ + for (n=0; m->r[n] != r; n++) + assert(n+1 < m->n); + t = m->t[n]; + rfree(m, t); + bsset(m->b, r); + ralloc(m, t); + bsclr(m->b, r); + } + t = req(to, R) ? r : to.val; + radd(m, t, r); +} + +static int +regcpy(Ins *i) +{ + return i->op == OCopy && isreg(i->arg[0]); +} + +static Ins * +dopm(Blk *b, Ins *i, RMap *m) +{ + RMap m0; + int n, r, r1, t, s; + Ins *i0, *i1, *ip, *ir; + bits def; + + m0 = *m; + i1 = ++i; + do { + i--; + move(i->arg[0].val, i->to, m); + } while (i != b->ins && regcpy(i-1)); + assert(m0.n <= m->n); + if (i != b->ins && (i-1)->op == OCall) { + def = retregs((i-1)->arg[1], 0); + for (r=0; rn; n++) { + t = m->t[n]; + s = tmp[t].slot; + r1 = m->r[n]; + r = rfind(&m0, t); + if (r != -1) + pmadd(TMP(r1), TMP(r), tmp[t].cls); + else if (s != -1) + pmadd(TMP(r1), SLOT(s), tmp[t].cls); + } + for (ip=i; ipto, R)) + rfree(m, ip->to.val); + r = ip->arg[0].val; + if (rfind(m, r) == -1) + radd(m, r, r); + } + pmgen(); +#ifdef TEST_PMOV + return 0; +#endif + n = b->nins - (i1 - i) + (curi - insb); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, b->ins, i - b->ins); + ip = icpy(ir = ip, insb, curi - insb); + ip = icpy(ip, i1, &b->ins[b->nins] - i1); + b->nins = n; + b->ins = i0; + return ir; +} + +static int +prio(Ref r1, Ref r2) +{ + /* trivial heuristic to begin with, + * later we can use the distance to + * the definition instruction + */ + (void) r2; + return *hint(r1.val) != -1; +} + +static void +insert(Ref *r, Ref **rs, int p) +{ + int i; + + rs[i = p] = r; + while (i-- > 0 && prio(*r, *rs[i])) { + rs[i+1] = rs[i]; + rs[i] = r; + } +} + +static void +doblk(Blk *b, RMap *cur) +{ + int x, r, nr; + bits rs; + Ins *i; + Mem *m; + Ref *ra[4]; + + if (rtype(b->jmp.arg) == RTmp) + b->jmp.arg = ralloc(cur, b->jmp.arg.val); + else if (rtype(b->jmp.arg) == RACall) { + /* add return registers */ + rs = retregs(b->jmp.arg, 0); + for (r=0; rs; rs/=2, r++) + if (rs & 1) + radd(cur, r, r); + } + for (i=&b->ins[b->nins]; i!=b->ins;) { + switch ((--i)->op) { + case OCall: + rs = argregs(i->arg[1], 0); + for (r=0; rarg[0])) { + i = dopm(b, i, cur); + continue; + } + if (isreg(i->to)) + if (rtype(i->arg[0]) == RTmp) + sethint(i->arg[0].val, i->to.val); + /* fall through */ + default: + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + r = rfree(cur, i->to.val); + if (r == -1 && !isreg(i->to)) { + *i = (Ins){.op = ONop}; + continue; + } + if (i->to.val >= Tmp0) + i->to = TMP(r); + } + break; + } + for (x=0, nr=0; x<2; x++) + switch (rtype(i->arg[x])) { + case RAMem: + m = &mem[i->arg[x].val & AMask]; + if (rtype(m->base) == RTmp) + insert(&m->base, ra, nr++); + if (rtype(m->index) == RTmp) + insert(&m->index, ra, nr++); + break; + case RTmp: + insert(&i->arg[x], ra, nr++); + break; + } + for (r=0; rval); + } +} + +/* register allocation + * depends on rpo, phi, cost, (and obviously spill) + */ +void +rega(Fn *fn) +{ + int j, n, t, r, r1, x, rl[Tmp0]; + Blk *b, *b1, *s, ***ps, *blist; + RMap *end, *beg, cur, old; + Ins *i; + Phi *p; + uint u; + Ref src, dst; + + /* 1. setup */ + regu = 0; + tmp = fn->tmp; + mem = fn->mem; + end = alloc(fn->nblk * sizeof end[0]); + beg = alloc(fn->nblk * sizeof beg[0]); + for (n=0; nnblk; n++) { + bsinit(end[n].b, fn->ntmp); + bsinit(beg[n].b, fn->ntmp); + } + bsinit(cur.b, fn->ntmp); + bsinit(old.b, fn->ntmp); + + for (t=Tmp0; tntmp; t++) + *hint(t) = -1; + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (i->op != OCopy || !isreg(i->arg[0])) + break; + else { + assert(rtype(i->to) == RTmp); + sethint(i->to.val, i->arg[0].val); + } + + /* 2. assign registers following post-order */ + for (n=fn->nblk-1; n>=0; n--) { + b = fn->rpo[n]; + cur.n = 0; + bszero(cur.b); + for (x=0; x<2; x++) + for (t=Tmp0; tntmp; t++) { + assert(bshas(b->out, t) || + !bshas(cur.b, t)); + if (bshas(b->out, t)) + if (!bshas(cur.b, t)) + if (x || (r=*hint(t)) != -1) + if (x || !bshas(cur.b, r)) + ralloc(&cur, t); + } + rcopy(&end[n], &cur); + doblk(b, &cur); + bscopy(b->in, cur.b); + for (p=b->phi; p; p=p->link) + if (rtype(p->to) == RTmp) { + bsclr(b->in, p->to.val); + /* heuristic 0: + * if the phi destination has an + * argument from a frequent block + * that was already allocated to + * 'r', use 'r' as the new hint + */ + memset(rl, 0, sizeof rl); + for (u=0; unarg; u++) { + t = p->arg[u].val; + b1 = p->blk[u]; + if (rtype(p->arg[u]) == RTmp) + if ((r=rfind(&end[b1->id], t)) != -1) + rl[r] += b1->loop; + } + for (x=0, j=0; j rl[x]) + x = j; + if (rl[x] >= b->loop) + *hint(p->to.val) = x; + } + if (b->npred > 1) { + /* heuristic 1: + * attempt to satisfy hints + * when it's simple and we have + * multiple predecessors + */ + rcopy(&old, &cur); + curi = &insb[NIns]; + for (j=0; jnins += j; + i = alloc(b->nins * sizeof(Ins)); + icpy(icpy(i, curi, j), b->ins, b->nins-j); + b->ins = i; + } + } + rcopy(&beg[n], &cur); + } + if (debug['R']) { + fprintf(stderr, "\n> Register mappings:\n"); + for (n=0; nnblk; n++) { + b = fn->rpo[n]; + fprintf(stderr, "\t%-10s beg", b->name); + mdump(&beg[n]); + fprintf(stderr, "\t end"); + mdump(&end[n]); + } + fprintf(stderr, "\n"); + } + + /* 3. compose glue code */ + blist = 0; + for (b=fn->start;; b=b->link) { + ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; + for (; (s=**ps); ps++) { + npm = 0; + for (p=s->phi; p; p=p->link) { + dst = p->to; + assert(rtype(dst)==RSlot || rtype(dst)==RTmp); + if (rtype(dst) == RTmp) { + r = rfind(&beg[s->id], dst.val); + if (r == -1) + continue; + dst = TMP(r); + } + for (u=0; p->blk[u]!=b; u++) + assert(u+1 < p->narg); + src = p->arg[u]; + if (rtype(src) == RTmp) + src = rref(&end[b->id], src.val); + pmadd(src, dst, p->cls); + } + for (t=Tmp0; tntmp; t++) + if (bshas(s->in, t)) { + src = rref(&end[b->id], t); + dst = rref(&beg[s->id], t); + pmadd(src, dst, tmp[t].cls); + } + pmgen(); + if (curi == insb) + continue; + b1 = blknew(); + b1->loop = (b->loop+s->loop) / 2; + b1->link = blist; + blist = b1; + fn->nblk++; + sprintf(b1->name, "%s_%s", b->name, s->name); + b1->nins = curi - insb; + idup(&b1->ins, insb, b1->nins); + b1->jmp.type = JJmp; + b1->s1 = s; + **ps = b1; + } + if (!b->link) { + b->link = blist; + break; + } + } + for (b=fn->start; b; b=b->link) + b->phi = 0; + fn->reg = regu; + + if (debug['R']) { + fprintf(stderr, "\n> After register allocation:\n"); + printfn(fn, stderr); + } +} diff --git a/spill.c b/spill.c new file mode 100644 index 0000000..72f8106 --- /dev/null +++ b/spill.c @@ -0,0 +1,507 @@ +#include "all.h" + +static void +loopmark(Blk *hd, Blk *b, Phi *p) +{ + int k, head; + uint n, a; + + head = hd->id; + if (b->id < head) + return; + for (; p; p=p->link) + for (a=0; anarg; a++) + if (p->blk[a] == b) + if (rtype(p->arg[a]) == RTmp) + bsset(hd->gen, p->arg[a].val); + if (b->visit == head) + return; + b->visit = head; + b->loop *= 10; + /* aggregate looping information at + * loop headers */ + bsunion(hd->gen, b->gen); + for (k=0; k<2; k++) + if (b->nlive[k] > hd->nlive[k]) + hd->nlive[k] = b->nlive[k]; + for (n=0; nnpred; n++) + loopmark(hd, b->pred[n], b->phi); +} + +static void +tmpuse(Ref r, int use, int loop, Fn *fn) +{ + Mem *m; + Tmp *t; + + if (rtype(r) == RAMem) { + m = &fn->mem[r.val & AMask]; + tmpuse(m->base, 1, loop, fn); + tmpuse(m->index, 1, loop, fn); + } + else if (rtype(r) == RTmp && r.val >= Tmp0) { + t = &fn->tmp[r.val]; + t->nuse += use; + t->ndef += !use; + t->cost += loop; + } +} + +/* evaluate spill costs of temporaries, + * this also fills usage information + * requires rpo, preds + */ +void +fillcost(Fn *fn) +{ + int n, hd; + uint a; + Blk *b; + Ins *i; + Tmp *t; + Phi *p; + + for (b=fn->start; b; b=b->link) { + b->loop = 1; + b->visit = -1; + } + if (debug['S']) + fprintf(stderr, "\n> Loop information:\n"); + for (n=0; nnblk; n++) { + b = fn->rpo[n]; + hd = 0; + for (a=0; anpred; a++) + if (b->pred[a]->id >= n) { + loopmark(b, b->pred[a], b->phi); + hd = 1; + } + if (hd && debug['S']) { + fprintf(stderr, "\t%-10s", b->name); + fprintf(stderr, " (% 3d ", b->nlive[0]); + fprintf(stderr, "% 3d) ", b->nlive[1]); + dumpts(b->gen, fn->tmp, stderr); + } + } + for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) { + t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0; + t->nuse = 0; + t->ndef = 0; + } + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) { + /* todo, the cost computation + * for p->to is not great... */ + tmpuse(p->to, 0, 0, fn); + for (a=0; anarg; a++) { + n = p->blk[a]->loop; + assert(b->npred==p->narg && + "wrong cfg"); + n /= b->npred; + tmpuse(p->arg[a], 1, n, fn); + } + } + n = b->loop; + for (i=b->ins; i-b->ins < b->nins; i++) { + tmpuse(i->to, 0, n, fn); + tmpuse(i->arg[0], 1, n, fn); + tmpuse(i->arg[1], 1, n, fn); + } + tmpuse(b->jmp.arg, 1, n, fn); + } + if (debug['S']) { + fprintf(stderr, "\n> Spill costs:\n"); + for (n=Tmp0; nntmp; n++) + fprintf(stderr, "\t%-10s %d\n", + fn->tmp[n].name, + fn->tmp[n].cost); + fprintf(stderr, "\n"); + } +} + +static BSet *fst; /* temps to prioritize in registers (for tcmp1) */ +static Tmp *tmp; /* current temporaries (for tcmpX) */ +static int ntmp; /* current # of temps (for limit) */ +static int locs; /* stack size used by locals */ +static int slot4; /* next slot of 4 bytes */ +static int slot8; /* ditto, 8 bytes */ +static BSet mask[2][1]; /* class masks */ + +static int +tcmp0(const void *pa, const void *pb) +{ + return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost; +} + +static int +tcmp1(const void *pa, const void *pb) +{ + int c; + + c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa); + return c ? c : tcmp0(pa, pb); +} + +static Ref +slot(int t) +{ + int s; + + if (t < Tmp0) + diag("spill: cannot spill register"); + s = tmp[t].slot; + if (s == -1) { + assert(NAlign == 3); + /* nice logic to pack stack slots + * on demand, there can be only + * one hole and slot4 points to it + * + * invariant: slot4 <= slot8 + */ + if (KWIDE(tmp[t].cls)) { + s = slot8; + if (slot4 == slot8) + slot4 += 2; + slot8 += 2; + } else { + s = slot4; + if (slot4 == slot8) { + slot8 += 2; + slot4 += 1; + } else + slot4 = slot8; + } + s += locs; + tmp[t].slot = s; + } + return SLOT(s); +} + +static void +limit(BSet *b, int k, BSet *f) +{ + static int *tarr, maxt; + int i, nt; + uint t; + + nt = bscount(b); + if (nt <= k) + return; + if (nt > maxt) { + free(tarr); + tarr = emalloc(nt * sizeof tarr[0]); + maxt = nt; + } + for (i=0, t=0; bsiter(b, &t); t++) { + bsclr(b, t); + tarr[i++] = t; + } + if (!f) + qsort(tarr, nt, sizeof tarr[0], tcmp0); + else { + fst = f; + qsort(tarr, nt, sizeof tarr[0], tcmp1); + } + for (i=0; iop == OCopy && isreg(i->arg[0]); +} + +static Ins * +dopm(Blk *b, Ins *i, BSet *v) +{ + int n, t; + BSet u[1]; + Ins *i1; + bits r; + + bsinit(u, ntmp); /* todo, free those */ + /* consecutive copies from + * registers need to be handled + * as one large instruction + * + * fixme: there is an assumption + * that calls are always followed + * by copy instructions here, this + * might not be true if previous + * passes change + */ + i1 = ++i; + do { + i--; + t = i->to.val; + if (!req(i->to, R)) + if (bshas(v, t)) { + bsclr(v, t); + store(i->to, tmp[t].slot); + } + bsset(v, i->arg[0].val); + } while (i != b->ins && regcpy(i-1)); + bscopy(u, v); + if (i != b->ins && (i-1)->op == OCall) { + v->t[0] &= ~retregs((i-1)->arg[1], 0); + limit2(v, NISave, NFSave, 0); + for (r=0, n=0; nt[0] |= argregs((i-1)->arg[1], 0); + } else { + limit2(v, 0, 0, 0); + r = v->t[0]; + } + sethint(v, r); + reloads(u, v); + do + emiti(*--i1); + while (i1 != i); + return i; +} + +/* spill code insertion + * requires spill costs, rpo, liveness + * + * Note: this will replace liveness + * information (in, out) with temporaries + * that must be in registers at block + * borders + * + * Be careful with: + * - OCopy instructions to ensure register + * constraints + */ +void +spill(Fn *fn) +{ + Blk *b, *s1, *s2, *hd, **bp; + int j, n, l, t, k, lvarg[2]; + BSet u[1], v[1], w[1]; + Ins *i; + Phi *p; + Mem *m; + bits r; + + tmp = fn->tmp; + ntmp = fn->ntmp; + bsinit(u, ntmp); + bsinit(v, ntmp); + bsinit(w, ntmp); + bsinit(mask[0], ntmp); + bsinit(mask[1], ntmp); + locs = fn->slot; + slot4 = 0; + slot8 = 0; + for (t=0; t= XMM0 && t < XMM0 + NFReg) + k = 1; + else if (t >= Tmp0) + k = KBASE(tmp[t].cls); + bsset(mask[k], t); + } + + for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) { + b = *--bp; + /* invariant: all bocks with bigger rpo got + * their in,out updated. */ + + /* 1. find temporaries in registers at + * the end of the block (put them in v) */ + curi = 0; + s1 = b->s1; + s2 = b->s2; + hd = 0; + if (s1 && s1->id <= n) + hd = s1; + if (s2 && s2->id <= n) + if (!hd || s2->id >= hd->id) + hd = s2; + r = 0; + bszero(v); + if (hd) { + /* back-edge */ + for (k=0; k<2; k++) { + n = k == 0 ? NIReg : NFReg; + bscopy(u, b->out); + bsinter(u, mask[k]); + bscopy(w, u); + bsinter(u, hd->gen); + bsdiff(w, hd->gen); + if ((int)bscount(u) < n) { /* fixme */ + j = bscount(w); /* live through */ + l = hd->nlive[k]; + limit(w, n - (l - j), 0); + bsunion(u, w); + } else + limit(u, n, 0); + bsunion(v, u); + } + } else if (s1) { + liveon(v, b, s1); + if (s2) { + liveon(u, b, s2); + bscopy(w, u); + bsinter(w, v); + bsunion(v, u); + } + limit2(v, 0, 0, w); + } else if (rtype(b->jmp.arg) == RACall) { + /* return */ + r = retregs(b->jmp.arg, 0); + v->t[0] |= r; + } + bscopy(b->out, v); + + /* 2. process the block instructions */ + curi = &insb[NIns]; + for (i=&b->ins[b->nins]; i!=b->ins;) { + i--; + if (regcpy(i)) { + i = dopm(b, i, v); + continue; + } + bszero(w); + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + if (bshas(v, t)) + bsclr(v, t); + else { + /* make sure we have a reg + * for the result */ + bsset(v, t); + bsset(w, t); + } + } + j = opdesc[i->op].nmem; + for (n=0; n<2; n++) + if (rtype(i->arg[n]) == RAMem) + j--; + for (n=0; n<2; n++) + switch (rtype(i->arg[n])) { + case RAMem: + t = i->arg[n].val; + m = &fn->mem[t & AMask]; + if (rtype(m->base) == RTmp) { + bsset(v, m->base.val); + bsset(w, m->base.val); + } + if (rtype(m->index) == RTmp) { + bsset(v, m->index.val); + bsset(w, m->index.val); + } + break; + case RTmp: + t = i->arg[n].val; + lvarg[n] = bshas(v, t); + bsset(v, t); + if (j-- <= 0) + bsset(w, t); + break; + } + bscopy(u, v); + limit2(v, 0, 0, w); + for (n=0; n<2; n++) + if (rtype(i->arg[n]) == RTmp) { + t = i->arg[n].val; + if (!bshas(v, t)) { + /* do not reload if the + * the temporary was dead + */ + if (!lvarg[n]) + bsclr(u, t); + i->arg[n] = slot(t); + } + } + reloads(u, v); + if (!req(i->to, R)) { + t = i->to.val; + store(i->to, tmp[t].slot); + bsclr(v, t); + } + emiti(*i); + r = v->t[0] & (BIT(Tmp0)-1); + if (r) + sethint(v, r); + } + assert(!r || b==fn->start); + + for (p=b->phi; p; p=p->link) { + assert(rtype(p->to) == RTmp); + t = p->to.val; + if (bshas(v, t)) { + bsclr(v, t); + store(p->to, tmp[t].slot); + } else if (bshas(b->in, t)) + /* only if the phi is live */ + p->to = slot(p->to.val); + } + bscopy(b->in, v); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + + /* align the locals to a 16 byte boundary */ + assert(NAlign == 3); + slot8 += slot8 & 3; + fn->slot += slot8; + + if (debug['S']) { + fprintf(stderr, "\n> Block information:\n"); + for (b=fn->start; b; b=b->link) { + printf("\t%-10s (% 5d) ", b->name, b->loop); + dumpts(b->out, fn->tmp, stdout); + } + fprintf(stderr, "\n> After spilling:\n"); + printfn(fn, stderr); + } +} diff --git a/src/.gitignore b/src/.gitignore deleted file mode 100644 index 5c8ecc2..0000000 --- a/src/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -qbe -config.h -.comfile -*.o -*.out diff --git a/src/.tag b/src/.tag deleted file mode 100644 index 5b8c210..0000000 --- a/src/.tag +++ /dev/null @@ -1,11 +0,0 @@ -Look slot( - -Get lisc.h -Get parse.c -Get isel.c -Get spill.c -Get rega.c -Get emit.c - -New -|fmt diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 6adfbd3..0000000 --- a/src/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -BIN = qbe -OBJ = main.o util.o parse.o mem.o ssa.o copy.o live.o isel.o spill.o rega.o emit.o - -CFLAGS += -Wall -Wextra -std=c99 -g -pedantic - -$(BIN): $(OBJ) - $(CC) $(LDFLAGS) $(OBJ) -o $@ - -$(OBJ): all.h config.h - -config.h: - @case `uname` in \ - *Darwin*) echo "#define Defaultasm Gasmacho" ;; \ - *Linux*) echo "#define Defaultasm Gaself" ;; \ - esac > $@ - - -all: $(BIN) -clean: - rm -f $(BIN) $(OBJ) -check: $(BIN) - ../test/go.sh all - -.PHONY: all clean check syndoc diff --git a/src/all.h b/src/all.h deleted file mode 100644 index 40c80f6..0000000 --- a/src/all.h +++ /dev/null @@ -1,563 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1] - -typedef unsigned int uint; -typedef unsigned short ushort; -typedef unsigned long ulong; -typedef unsigned long bits; - -typedef struct BSet BSet; -typedef struct Ref Ref; -typedef struct OpDesc OpDesc; -typedef struct Ins Ins; -typedef struct Phi Phi; -typedef struct Blk Blk; -typedef struct Use Use; -typedef struct Tmp Tmp; -typedef struct Con Con; -typedef struct Addr Mem; -typedef struct Fn Fn; -typedef struct Typ Typ; -typedef struct Dat Dat; - -enum Reg { - RXX, - - RAX, /* caller-save */ - RCX, - RDX, - RSI, - RDI, - R8, - R9, - R10, - R11, - - RBX, /* callee-save */ - R12, - R13, - R14, - R15, - - RBP, /* reserved */ - RSP, - - XMM0, /* sse */ - XMM1, - XMM2, - XMM3, - XMM4, - XMM5, - XMM6, - XMM7, - XMM8, - XMM9, - XMM10, - XMM11, - XMM12, - XMM13, - XMM14, - XMM15, - - Tmp0, /* first non-reg temporary */ - - NIReg = R12 - RAX + 1, - NFReg = XMM14 - XMM0 + 1, - NISave = 9, - NFSave = NFReg, - NRSave = NISave + NFSave, - NRClob = 5, -}; - -enum { - NString = 32, - NPred = 63, - NIns = 8192, - NAlign = 3, - NSeg = 32, - NTyp = 128, - NBit = CHAR_BIT * sizeof(bits), -}; - -MAKESURE(NBit_is_enough, NBit >= (int)Tmp0); - -#define BIT(n) ((bits)1 << (n)) - -struct BSet { - uint nt; - bits *t; -}; - -struct Ref { - uint16_t type:2; - uint16_t val:14; -}; - -enum Alt { - AType, - ACall, - AMem, - - AShift = 12, - AMask = (1<> AShift); - return r.type; -} - -static inline int isreg(Ref r) -{ - return rtype(r) == RTmp && r.val < Tmp0; -} - -enum ICmp { -#define ICMPS(X) \ - X(ule) \ - X(ult) \ - X(sle) \ - X(slt) \ - X(sgt) \ - X(sge) \ - X(ugt) \ - X(uge) \ - X(eq) \ - X(ne) /* make sure icmpop() below works! */ - -#define X(c) IC##c, - ICMPS(X) -#undef X - NICmp, - - ICXnp = NICmp, /* x64 specific */ - ICXp, - NXICmp -}; - -static inline int icmpop(int c) -{ - return c >= ICeq ? c : ICuge - c; -} - -enum FCmp { -#define FCMPS(X) \ - X(le) \ - X(lt) \ - X(gt) \ - X(ge) \ - X(ne) \ - X(eq) \ - X(o) \ - X(uo) - -#define X(c) FC##c, - FCMPS(X) -#undef X - NFCmp -}; - -enum Class { - Kw, - Kl, - Ks, - Kd -}; - -#define KWIDE(k) ((k)&1) -#define KBASE(k) ((k)>>1) - -enum Op { - OXXX, - - /* public instructions */ - OAdd, - OSub, - ODiv, - ORem, - OUDiv, - OURem, - OMul, - OAnd, - OOr, - OXor, - OSar, - OShr, - OShl, - OCmpw, - OCmpw1 = OCmpw + NICmp-1, - OCmpl, - OCmpl1 = OCmpl + NICmp-1, - OCmps, - OCmps1 = OCmps + NFCmp-1, - OCmpd, - OCmpd1 = OCmpd + NFCmp-1, - - OStored, - OStores, - OStorel, - OStorew, - OStoreh, - OStoreb, -#define isstore(o) (OStored <= o && o <= OStoreb) - OLoadsw, /* needs to match OExt (mem.c) */ - OLoaduw, - OLoadsh, - OLoaduh, - OLoadsb, - OLoadub, - OLoad, -#define isload(o) (OLoadsw <= o && o <= OLoad) - OExtsw, - OExtuw, - OExtsh, - OExtuh, - OExtsb, - OExtub, -#define isext(o) (OExtsw <= o && o <= OExtub) - - OExts, - OTruncd, - OFtosi, - OSitof, - OCast, - - OAlloc, - OAlloc1 = OAlloc + NAlign-1, - - OCopy, - NPubOp, - - /* function instructions */ - OPar = NPubOp, - OParc, - OArg, - OArgc, - OCall, - - /* reserved instructions */ - ONop, - OAddr, - OSwap, - OSign, - OSAlloc, - OXIDiv, - OXDiv, - OXCmp, - OXSet, - OXSetnp = OXSet + ICXnp, - OXSetp = OXSet + ICXp, - OXTest, - NOp -}; - -enum Jmp { - JXXX, - JRet0, - JRetw, - JRetl, - JRets, - JRetd, - JRetc, -#define isret(j) (JRet0 <= j && j <= JRetc) - JJmp, - JJnz, - JXJc, - JXJnp = JXJc + ICXnp, - JXJp = JXJc + ICXp, - NJmp -}; - -struct OpDesc { - char *name; - int nmem; - char argcls[2][4]; - uint sflag:1; /* sets the zero flag */ - uint lflag:1; /* leaves flags */ -}; - -struct Ins { - ushort op:14; - Ref to; - Ref arg[2]; - ushort cls:2; -}; - -struct Phi { - Ref to; - Ref arg[NPred]; - Blk *blk[NPred]; - uint narg; - int cls; - Phi *link; -}; - -struct Blk { - Phi *phi; - Ins *ins; - uint nins; - struct { - short type; - Ref arg; - } jmp; - Blk *s1; - Blk *s2; - Blk *link; - - int id; - int visit; - - Blk *idom; - Blk *dom, *dlink; - Blk **fron; - int nfron; - - Blk **pred; - uint npred; - BSet in[1], out[1], gen[1]; - int nlive[2]; - int loop; - char name[NString]; -}; - -struct Use { - enum { - UXXX, - UPhi, - UIns, - UJmp, - } type; - int bid; - union { - Ins *ins; - Phi *phi; - } u; -}; - -struct Tmp { - char name[NString]; - Use *use; - uint ndef, nuse; - uint cost; - short slot; - short cls; - struct { - int r; - bits m; - } hint; - int phi; - int visit; -}; - -struct Con { - enum { - CUndef, - CBits, - CAddr, - } type; - char label[NString]; - union { - int64_t i; - double d; - float s; - } bits; - char flt; /* for printing, see parse.c */ - char local; -}; - -typedef struct Addr Addr; - -struct Addr { /* x64 addressing */ - Con offset; - Ref base; - Ref index; - int scale; -}; - -struct Fn { - Blk *start; - Tmp *tmp; - Con *con; - Mem *mem; - int ntmp; - int ncon; - int nmem; - int nblk; - int retty; /* index in typ[], -1 if no aggregate return */ - Ref retr; - Blk **rpo; - bits reg; - int slot; - char export; - char name[NString]; -}; - -struct Typ { - char name[NString]; - int dark; - uint size; - int align; - - struct { - uint isflt:1; - uint ispad:1; - uint len:30; - } seg[NSeg+1]; -}; - -struct Dat { - enum { - DStart, - DEnd, - DName, - DAlign, - DB, - DH, - DW, - DL, - DZ - } type; - union { - int64_t num; - double fltd; - float flts; - char *str; - struct { - char *nam; - int64_t off; - } ref; - } u; - char isref; - char isstr; - char export; -}; - - -/* main.c */ -enum Asm { - Gasmacho, - Gaself, -}; -extern char debug['Z'+1]; - -/* util.c */ -extern Typ typ[NTyp]; -extern Ins insb[NIns], *curi; -void diag(char *) __attribute__((noreturn)); -void *emalloc(size_t); -void *alloc(size_t); -void freeall(void); -Blk *blknew(void); -void emit(int, int, Ref, Ref, Ref); -void emiti(Ins); -void idup(Ins **, Ins *, ulong); -Ins *icpy(Ins *, Ins *, ulong); -void *vnew(ulong, size_t); -void vgrow(void *, ulong); -int phicls(int, Tmp *); -Ref newtmp(char *, int, Fn *); -Ref getcon(int64_t, Fn *); -void addcon(Con *, Con *); -void dumpts(BSet *, Tmp *, FILE *); - -void bsinit(BSet *, uint); -void bszero(BSet *); -uint bscount(BSet *); -void bsset(BSet *, uint); -void bsclr(BSet *, uint); -void bscopy(BSet *, BSet *); -void bsunion(BSet *, BSet *); -void bsinter(BSet *, BSet *); -void bsdiff(BSet *, BSet *); -int bsequal(BSet *, BSet *); -int bsiter(BSet *, uint *); - -static inline int -bshas(BSet *bs, uint elt) -{ - assert(elt < bs->nt * NBit); - return (bs->t[elt/NBit] & BIT(elt%NBit)) != 0; -} - -/* parse.c */ -extern OpDesc opdesc[NOp]; -void parse(FILE *, char *, void (Dat *), void (Fn *)); -void printfn(Fn *, FILE *); -void printref(Ref, Fn *, FILE *); -void err(char *, ...); - -/* mem.c */ -void memopt(Fn *); - -/* ssa.c */ -void filluse(Fn *); -void fillpreds(Fn *); -void fillrpo(Fn *); -void ssa(Fn *); - -/* copy.c */ -void copy(Fn *); - -/* live.c */ -void liveon(BSet *, Blk *, Blk *); -void filllive(Fn *); - -/* isel.c */ -extern int rsave[/* NRSave */]; -extern int rclob[/* NRClob */]; -bits retregs(Ref, int[2]); -bits argregs(Ref, int[2]); -void isel(Fn *); - -/* spill.c */ -void fillcost(Fn *); -void spill(Fn *); - -/* rega.c */ -void rega(Fn *); - -/* emit.c */ -extern char *locprefix; -extern char *symprefix; -void emitfn(Fn *, FILE *); -void emitdat(Dat *, FILE *); -int stashfp(int64_t, int); -void emitfin(FILE *); diff --git a/src/copy.c b/src/copy.c deleted file mode 100644 index ef2d01d..0000000 --- a/src/copy.c +++ /dev/null @@ -1,159 +0,0 @@ -#include "all.h" - -typedef struct RList RList; -struct RList { - int t; - RList *l; -}; - -static Ref -copyof(Ref r, Ref *cp) -{ - if (rtype(r) == RTmp) - return cp[r.val]; - else - return r; -} - -static void -update(Ref r, Ref rcp, Ref *cp, RList **w) -{ - RList *l; - - if (!req(cp[r.val], rcp)) { - cp[r.val] = rcp; - l = emalloc(sizeof *l); - l->t = r.val; - l->l = *w; - *w = l; - } -} - -static void -visitphi(Phi *p, Ref *cp, RList **w) -{ - uint a; - Ref r, r1; - - r = R; - for (a=0; anarg; a++) { - r1 = copyof(p->arg[a], cp); - if (req(r1, R)) - continue; - if (req(r, R) || req(r, r1)) - r = r1; - else { - r = p->to; - break; - } - } - assert(!req(r, R)); - update(p->to, r, cp, w); -} - -static void -visitins(Ins *i, Ref *cp, RList **w) -{ - Ref r; - - if (i->op == OCopy) { - r = copyof(i->arg[0], cp); - update(i->to, r, cp, w); - } else if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - update(i->to, i->to, cp, w); - } -} - -void -copy(Fn *fn) -{ - Blk *b; - Ref *cp, r; - RList *w, *w1; - Use *u, *u1; - Ins *i; - Phi *p, **pp; - uint a; - int t; - - w = 0; - cp = emalloc(fn->ntmp * sizeof cp[0]); - for (b=fn->start; b; b=b->link) { - for (p=b->phi; p; p=p->link) - visitphi(p, cp, &w); - for (i=b->ins; i-b->ins < b->nins; i++) - visitins(i, cp, &w); - } - while ((w1=w)) { - t = w->t; - w = w->l; - free(w1); - u = fn->tmp[t].use; - u1 = u + fn->tmp[t].nuse; - for (; utype) { - default: - diag("copy: invalid use"); - case UPhi: - visitphi(u->u.phi, cp, &w); - break; - case UIns: - visitins(u->u.ins, cp, &w); - break; - case UJmp: - break; - } - } - for (b=fn->start; b; b=b->link) { - for (pp=&b->phi; (p=*pp);) { - r = cp[p->to.val]; - if (!req(r, p->to)) { - *pp = p->link; - continue; - } - for (a=0; anarg; a++) - if (rtype(p->arg[a]) == RTmp) { - r = cp[p->arg[a].val]; - assert(!req(r, R)); - p->arg[a] = r; - } - pp=&p->link; - } - for (i=b->ins; i-b->ins < b->nins; i++) { - r = cp[i->to.val]; - if (!req(r, i->to)) { - *i = (Ins){.op = ONop}; - continue; - } - for (a=0; a<2; a++) - if (rtype(i->arg[a]) == RTmp) { - r = cp[i->arg[a].val]; - assert(!req(r, R)); - i->arg[a] = r; - } - } - if (rtype(b->jmp.arg) == RTmp) { - r = cp[b->jmp.arg.val]; - assert(!req(r, R)); - b->jmp.arg = r; - } - } - if (debug['C']) { - fprintf(stderr, "\n> Copy information:"); - for (t=Tmp0; tntmp; t++) { - if (req(cp[t], R)) { - fprintf(stderr, "\n%10s not seen!", - fn->tmp[t].name); - } - else if (!req(cp[t], TMP(t))) { - fprintf(stderr, "\n%10s copy of ", - fn->tmp[t].name); - printref(cp[t], fn, stderr); - } - } - fprintf(stderr, "\n\n> After copy elimination:\n"); - printfn(fn, stderr); - } - free(cp); -} diff --git a/src/emit.c b/src/emit.c deleted file mode 100644 index 9b2975d..0000000 --- a/src/emit.c +++ /dev/null @@ -1,669 +0,0 @@ -#include "all.h" - -char *locprefix, *symprefix; - -enum { - SLong = 0, - SWord = 1, - SShort = 2, - SByte = 3, - - Ki = -1, /* matches Kw and Kl */ - Ka = -2, /* matches all classes */ -}; - -/* Instruction format strings: - * - * if the format string starts with -, the instruction - * is assumed to be 3-address and is put in 2-address - * mode using an extra mov if necessary - * - * if the format string starts with +, the same as the - * above applies, but commutativity is also assumed - * - * %k is used to set the class of the instruction, - * it'll expand to "l", "q", "ss", "sd", depending - * on the instruction class - * %0 designates the first argument - * %1 designates the second argument - * %= designates the result - * - * if %k is not used, a prefix to 0, 1, or = must be - * added, it can be: - * M - memory reference - * L - long (64 bits) - * W - word (32 bits) - * H - short (16 bits) - * B - byte (8 bits) - * S - single precision float - * D - double precision float - */ -static struct { - short op; - short cls; - char *asm; -} omap[] = { - { OAdd, Ka, "+add%k %1, %=" }, - { OSub, Ka, "-sub%k %1, %=" }, - { OAnd, Ki, "+and%k %1, %=" }, - { OOr, Ki, "+or%k %1, %=" }, - { OXor, Ki, "+xor%k %1, %=" }, - { OSar, Ki, "-sar%k %B1, %=" }, - { OShr, Ki, "-shr%k %B1, %=" }, - { OShl, Ki, "-shl%k %B1, %=" }, - { OMul, Ki, "+imul%k %1, %=" }, - { OMul, Ks, "+mulss %1, %=" }, /* fixme */ - { OMul, Kd, "+mulsd %1, %=" }, - { ODiv, Ka, "-div%k %1, %=" }, - { OStorel, Ka, "movq %L0, %M1" }, - { OStorew, Ka, "movl %W0, %M1" }, - { OStoreh, Ka, "movw %H0, %M1" }, - { OStoreb, Ka, "movb %B0, %M1" }, - { OStores, Ka, "movss %S0, %M1" }, - { OStored, Ka, "movsd %D0, %M1" }, - { OLoad, Ka, "mov%k %M0, %=" }, - { OLoadsw, Kl, "movslq %M0, %L=" }, - { OLoadsw, Kw, "movl %M0, %W=" }, - { OLoaduw, Ki, "movl %M0, %W=" }, - { OLoadsh, Ki, "movsw%k %M0, %=" }, - { OLoaduh, Ki, "movzw%k %M0, %=" }, - { OLoadsb, Ki, "movsb%k %M0, %=" }, - { OLoadub, Ki, "movzb%k %M0, %=" }, - { OExtsw, Kl, "movslq %W0, %L=" }, - { OExtuw, Kl, "movl %W0, %W=" }, - { OExtsh, Ki, "movsw%k %H0, %=" }, - { OExtuh, Ki, "movzw%k %H0, %=" }, - { OExtsb, Ki, "movsb%k %B0, %=" }, - { OExtub, Ki, "movzb%k %B0, %=" }, - - { OExts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */ - { OTruncd, Ks, "cvttsd2ss %0, %=" }, - { OFtosi, Kw, "cvttss2si %0, %=" }, - { OFtosi, Kl, "cvttsd2si %0, %=" }, - { OSitof, Ks, "cvtsi2ss %W0, %=" }, - { OSitof, Kd, "cvtsi2sd %L0, %=" }, - { OCast, Ki, "movq %D0, %L=" }, - { OCast, Ka, "movq %L0, %D=" }, - - { OAddr, Ki, "lea%k %M0, %=" }, - { OSwap, Ki, "xchg%k %0, %1" }, - { OSign, Kl, "cqto" }, - { OSign, Kw, "cltd" }, - { OXDiv, Ki, "div%k %0" }, - { OXIDiv, Ki, "idiv%k %0" }, - { OXCmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */ - { OXCmp, Kd, "comisd %D0, %D1" }, - { OXCmp, Ki, "cmp%k %0, %1" }, - { OXTest, Ki, "test%k %0, %1" }, - { OXSet+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" }, - { OXSet+ICXp, Ki, "setp %B=\n\tmovsb%k %B=, %=" }, - { NOp, 0, 0 } -}; - -static char *rname[][4] = { - [RAX] = {"rax", "eax", "ax", "al"}, - [RBX] = {"rbx", "ebx", "bx", "bl"}, - [RCX] = {"rcx", "ecx", "cx", "cl"}, - [RDX] = {"rdx", "edx", "dx", "dl"}, - [RSI] = {"rsi", "esi", "si", "sil"}, - [RDI] = {"rdi", "edi", "di", "dil"}, - [RBP] = {"rbp", "ebp", "bp", "bpl"}, - [RSP] = {"rsp", "esp", "sp", "spl"}, - [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, - [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, - [R10] = {"r10", "r10d", "r10w", "r10b"}, - [R11] = {"r11", "r11d", "r11w", "r11b"}, - [R12] = {"r12", "r12d", "r12w", "r12b"}, - [R13] = {"r13", "r13d", "r13w", "r13b"}, - [R14] = {"r14", "r14d", "r14w", "r14b"}, - [R15] = {"r15", "r15d", "r15w", "r15b"}, -}; - - -static int -slot(int s, Fn *fn) -{ - struct { int i:14; } x; - - /* sign extend s using a bitfield */ - x.i = s; - assert(NAlign == 3); - if (x.i < 0) - return -4 * x.i; - else { - assert(fn->slot >= x.i); - return -4 * (fn->slot - x.i); - } -} - -static void -emitcon(Con *con, FILE *f) -{ - switch (con->type) { - default: - diag("emit: invalid constant"); - case CAddr: - fprintf(f, "%s%s", con->local ? locprefix : symprefix, con->label); - if (con->bits.i) - fprintf(f, "%+"PRId64, con->bits.i); - break; - case CBits: - fprintf(f, "%"PRId64, con->bits.i); - break; - } -} - -static char * -regtoa(int reg, int sz) -{ - static char buf[6]; - - if (reg >= XMM0) { - sprintf(buf, "xmm%d", reg-XMM0); - return buf; - } else - return rname[reg][sz]; -} - -static Ref -getarg(char c, Ins *i) -{ - switch (c) { - default: - diag("emit: 0, 1, = expected in format"); - case '0': - return i->arg[0]; - case '1': - return i->arg[1]; - case '=': - return i->to; - } -} - -static void emitins(Ins, Fn *, FILE *); - -static void -emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) -{ - Ins icp; - - icp.op = OCopy; - icp.arg[0] = r2; - icp.to = r1; - icp.cls = k; - emitins(icp, fn, f); -} - -static void -emitf(char *s, Ins *i, Fn *fn, FILE *f) -{ - static char clstoa[][3] = {"l", "q", "ss", "sd"}; - char c; - int sz; - Ref ref; - Mem *m; - Con off; - - switch (*s) { - case '+': - if (req(i->arg[1], i->to)) { - ref = i->arg[0]; - i->arg[0] = i->arg[1]; - i->arg[1] = ref; - } - /* fall through */ - case '-': - if (req(i->arg[1], i->to) && !req(i->arg[0], i->to)) - diag("emit: cannot convert to 2-address"); - emitcopy(i->to, i->arg[0], i->cls, fn, f); - s++; - break; - } - - fputc('\t', f); -Next: - while ((c = *s++) != '%') - if (!c) { - fputc('\n', f); - return; - } else - fputc(c, f); - switch ((c = *s++)) { - default: - diag("emit: invalid escape"); - case '%': - fputc('%', f); - break; - case 'k': - fputs(clstoa[i->cls], f); - break; - case '0': - case '1': - case '=': - sz = KWIDE(i->cls) ? SLong : SWord; - s--; - /* fall through */ - case 'D': - case 'S': - Ref: - c = *s++; - ref = getarg(c, i); - switch (rtype(ref)) { - default: - diag("emit: invalid reference"); - case RTmp: - assert(isreg(ref)); - fprintf(f, "%%%s", regtoa(ref.val, sz)); - break; - case RSlot: - fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); - break; - case RAMem: - Mem: - m = &fn->mem[ref.val & AMask]; - if (rtype(m->base) == RSlot) { - off.type = CBits; - off.bits.i = slot(m->base.val, fn); - addcon(&m->offset, &off); - m->base = TMP(RBP); - } - if (m->offset.type != CUndef) - emitcon(&m->offset, f); - fputc('(', f); - if (req(m->base, R)) - fprintf(f, "%%rip"); - else - fprintf(f, "%%%s", regtoa(m->base.val, SLong)); - if (!req(m->index, R)) - fprintf(f, ", %%%s, %d", - regtoa(m->index.val, SLong), - m->scale - ); - fputc(')', f); - break; - case RCon: - fputc('$', f); - emitcon(&fn->con[ref.val], f); - break; - } - break; - case 'L': - sz = SLong; - goto Ref; - case 'W': - sz = SWord; - goto Ref; - case 'H': - sz = SShort; - goto Ref; - case 'B': - sz = SByte; - goto Ref; - case 'M': - c = *s++; - ref = getarg(c, i); - switch (rtype(ref)) { - default: - diag("emit: invalid memory reference"); - case RAMem: - goto Mem; - case RSlot: - fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); - break; - case RCon: - emitcon(&fn->con[ref.val], f); - fprintf(f, "(%%rip)"); - break; - case RTmp: - assert(isreg(ref)); - fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); - break; - } - break; - } - goto Next; -} - -static void -emitins(Ins i, Fn *fn, FILE *f) -{ - Ref r; - int64_t val; - int o; - - switch (i.op) { - default: - Table: - /* most instructions are just pulled out of - * the table omap[], some special cases are - * detailed below */ - for (o=0;; o++) { - /* this linear search should really be a binary - * search */ - if (omap[o].op == NOp) - diag("emit: no entry found for instruction"); - if (omap[o].op == i.op) - if (omap[o].cls == i.cls - || (omap[o].cls == Ki && KBASE(i.cls) == 0) - || (omap[o].cls == Ka)) - break; - } - emitf(omap[o].asm, &i, fn, f); - break; - case ONop: - /* just do nothing for nops, they are inserted - * by some passes */ - break; - case OMul: - /* here, we try to use the 3-addresss form - * of multiplication when possible */ - if (rtype(i.arg[1]) == RCon) { - r = i.arg[0]; - i.arg[0] = i.arg[1]; - i.arg[1] = r; - } - if (KBASE(i.cls) == 0 /* only available for ints */ - && rtype(i.arg[0]) == RCon - && rtype(i.arg[1]) == RTmp) { - emitf("imul%k %0, %1, %=", &i, fn, f); - break; - } - goto Table; - case OSub: - /* we have to use the negation trick to handle - * some 3-address substractions */ - if (req(i.to, i.arg[1])) { - emitf("neg%k %=", &i, fn, f); - emitf("add%k %0, %=", &i, fn, f); - break; - } - goto Table; - case OCopy: - /* make sure we don't emit useless copies, - * also, we can use a trick to load 64-bits - * registers, it's detailed in my note below - * http://c9x.me/art/notes.html?09/19/2015 */ - if (req(i.to, R) || req(i.arg[0], R)) - break; - if (isreg(i.to) - && rtype(i.arg[0]) == RCon - && i.cls == Kl - && fn->con[i.arg[0].val].type == CBits - && (val = fn->con[i.arg[0].val].bits.i) >= 0 - && val <= UINT32_MAX) { - emitf("movl %W0, %W=", &i, fn, f); - } else if (isreg(i.to) - && rtype(i.arg[0]) == RCon - && fn->con[i.arg[0].val].type == CAddr) { - emitf("lea%k %M0, %=", &i, fn, f); - } else if (!req(i.arg[0], i.to)) - emitf("mov%k %0, %=", &i, fn, f); - break; - case OCall: - /* calls simply have a weird syntax in AT&T - * assembly... */ - switch (rtype(i.arg[0])) { - default: - diag("emit: invalid call instruction"); - case RCon: - fprintf(f, "\tcallq "); - emitcon(&fn->con[i.arg[0].val], f); - fprintf(f, "\n"); - break; - case RTmp: - emitf("callq *%L0", &i, fn, f); - break; - } - break; - case OSAlloc: - /* there is no good reason why this is here - * maybe we should split OSAlloc in 2 different - * instructions depending on the result - */ - emitf("subq %L0, %%rsp", &i, fn, f); - if (!req(i.to, R)) - emitcopy(i.to, TMP(RSP), Kl, fn, f); - break; - case OSwap: - if (KBASE(i.cls) == 0) - goto Table; - /* for floats, there is no swap instruction - * so we use xmm15 as a temporary - */ - emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); - emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); - emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); - break; - } -} - -static int -cneg(int cmp) -{ - switch (cmp) { - default: diag("emit: cneg() unhandled comparison"); - case ICule: return ICugt; - case ICult: return ICuge; - case ICsle: return ICsgt; - case ICslt: return ICsge; - case ICsgt: return ICsle; - case ICsge: return ICslt; - case ICugt: return ICule; - case ICuge: return ICult; - case ICeq: return ICne; - case ICne: return ICeq; - case ICXnp: return ICXp; - case ICXp: return ICXnp; - } -} - -static int -framesz(Fn *fn) -{ - int i, o, f; - - assert(NAlign == 3); - for (i=0, o=0; ireg >> rclob[i]); - f = fn->slot; - f = (f + 3) & -4; - return 4*f + 8*o; -} - -void -emitfn(Fn *fn, FILE *f) -{ - static char *ctoa[] = { - [ICeq] = "z", - [ICule] = "be", - [ICult] = "b", - [ICsle] = "le", - [ICslt] = "l", - [ICsgt] = "g", - [ICsge] = "ge", - [ICugt] = "a", - [ICuge] = "ae", - [ICne] = "nz", - [ICXnp] = "np", - [ICXp] = "p" - }; - Blk *b, *s; - Ins *i, itmp; - int *r, c, fs; - - fprintf(f, ".text\n"); - if (fn->export) - fprintf(f, ".globl %s%s\n", symprefix, fn->name); - fprintf(f, - "%s%s:\n" - "\tpush %%rbp\n" - "\tmov %%rsp, %%rbp\n", - symprefix, fn->name - ); - fs = framesz(fn); - if (fs) - fprintf(f, "\tsub $%d, %%rsp\n", fs); - for (r=rclob; r-rclob < NRClob; r++) - if (fn->reg & BIT(*r)) { - itmp.arg[0] = TMP(*r); - emitf("pushq %L0", &itmp, fn, f); - } - - for (b=fn->start; b; b=b->link) { - fprintf(f, "%s%s:\n", locprefix, b->name); - for (i=b->ins; i!=&b->ins[b->nins]; i++) - emitins(*i, fn, f); - switch (b->jmp.type) { - case JRet0: - for (r=&rclob[NRClob]; r>rclob;) - if (fn->reg & BIT(*--r)) { - itmp.arg[0] = TMP(*r); - emitf("popq %L0", &itmp, fn, f); - } - fprintf(f, - "\tleave\n" - "\tret\n" - ); - break; - case JJmp: - if (b->s1 != b->link) - fprintf(f, "\tjmp %s%s\n", locprefix, b->s1->name); - break; - default: - c = b->jmp.type - JXJc; - if (0 <= c && c <= NXICmp) { - if (b->link == b->s2) { - s = b->s1; - } else if (b->link == b->s1) { - c = cneg(c); - s = b->s2; - } else - diag("emit: unhandled jump (1)"); - fprintf(f, "\tj%s %s%s\n", ctoa[c], locprefix, s->name); - break; - } - diag("emit: unhandled jump (2)"); - } - } - -} - -void -emitdat(Dat *d, FILE *f) -{ - static int align; - static char *dtoa[] = { - [DAlign] = ".align", - [DB] = "\t.byte", - [DH] = "\t.value", - [DW] = "\t.long", - [DL] = "\t.quad" - }; - - switch (d->type) { - case DStart: - align = 0; - fprintf(f, ".data\n"); - break; - case DEnd: - break; - case DName: - if (!align) - fprintf(f, ".align 8\n"); - if (d->export) - fprintf(f, ".globl %s%s\n", symprefix, d->u.str); - fprintf(f, "%s%s:\n", symprefix, d->u.str); - break; - case DZ: - fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num); - break; - default: - if (d->type == DAlign) - align = 1; - - if (d->isstr) { - if (d->type != DB) - err("strings only supported for 'b' currently"); - fprintf(f, "\t.ascii \"%s\"\n", d->u.str); - } - else if (d->isref) { - fprintf(f, "%s %s%+"PRId64"\n", - dtoa[d->type], d->u.ref.nam, - d->u.ref.off); - } - else { - fprintf(f, "%s %"PRId64"\n", - dtoa[d->type], d->u.num); - } - break; - } -} - -typedef struct FBits FBits; - -struct FBits { - int64_t bits; - int wide; - FBits *link; -}; - -static FBits *stash; - -int -stashfp(int64_t n, int w) -{ - FBits **pb, *b; - int i; - - /* does a dumb de-dup of fp constants - * this should be the linker's job */ - for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++) - if (n == b->bits && w == b->wide) - return i; - b = emalloc(sizeof *b); - b->bits = n; - b->wide = w; - b->link = 0; - *pb = b; - return i; -} - -void -emitfin(FILE *f) -{ - FBits *b; - int i; - - if (!stash) - return; - fprintf(f, "/* floating point constants */\n"); - fprintf(f, ".data\n.align 8\n"); - for (b=stash, i=0; b; b=b->link, i++) - if (b->wide) - fprintf(f, - "%sfp%d:\n" - "\t.quad %"PRId64 - " /* %f */\n", - locprefix, i, b->bits, - *(double *)&b->bits - ); - for (b=stash, i=0; b; b=b->link, i++) - if (!b->wide) - fprintf(f, - "%sfp%d:\n" - "\t.long %"PRId64 - " /* %lf */\n", - locprefix, i, b->bits & 0xffffffff, - *(float *)&b->bits - ); - while ((b=stash)) { - stash = b->link; - free(b); - } -} diff --git a/src/isel.c b/src/isel.c deleted file mode 100644 index 2a55733..0000000 --- a/src/isel.c +++ /dev/null @@ -1,1136 +0,0 @@ -#include "all.h" -#include - -/* For x86_64, do the following: - * - * - lower calls - * - check that constants are used only in - * places allowed - * - ensure immediates always fit in 32b - * - explicit machine register contraints - * on instructions like division. - * - implement fast locals (the streak of - * constant allocX in the first basic block) - * - recognize complex addressing modes - * - * Invariant: the use counts that are used - * in sel() must be sound. This - * is not so trivial, maybe the - * dce should be moved out... - */ - -typedef struct ANum ANum; -typedef struct AClass AClass; -typedef struct RAlloc RAlloc; - -struct ANum { - char n, l, r; - Ins *i; - Ref mem; -}; - -static void amatch(Addr *, Ref, ANum *, Fn *, int); - -static int -fcmptoi(int fc) -{ - switch (fc) { - default: diag("isel: fcmptoi defaulted"); - case FCle: return ICule; - case FClt: return ICult; - case FCgt: return ICugt; - case FCge: return ICuge; - case FCne: return ICne; - case FCeq: return ICeq; - case FCo: return ICXnp; - case FCuo: return ICXp; - } -} - -static int -iscmp(int op, int *pk, int *pc) -{ - int k, c; - - if (OCmpw <= op && op <= OCmpw1) { - c = op - OCmpw; - k = Kw; - } - else if (OCmpl <= op && op <= OCmpl1) { - c = op - OCmpl; - k = Kl; - } - else if (OCmps <= op && op <= OCmps1) { - c = fcmptoi(op - OCmps); - k = Ks; - } - else if (OCmpd <= op && op <= OCmpd1) { - c = fcmptoi(op - OCmpd); - k = Kd; - } - else - return 0; - if (pk) - *pk = k; - if (pc) - *pc = c; - return 1; -} - -static int -noimm(Ref r, Fn *fn) -{ - int64_t val; - - if (rtype(r) != RCon) - return 0; - switch (fn->con[r.val].type) { - default: - diag("isel: invalid constant"); - case CAddr: - /* we only support the 'small' - * code model of the ABI, this - * means that we can always - * address data with 32bits - */ - return 0; - case CBits: - val = fn->con[r.val].bits.i; - return (val < INT32_MIN || val > INT32_MAX); - } -} - -static int -rslot(Ref r, Fn *fn) -{ - if (rtype(r) != RTmp) - return -1; - return fn->tmp[r.val].slot; -} - -static int -argcls(Ins *i, int n) -{ - return opdesc[i->op].argcls[n][i->cls]; -} - -static void -fixarg(Ref *r, int k, int phi, Fn *fn) -{ - Addr a; - Ref r0, r1; - int s, n; - - r1 = r0 = *r; - s = rslot(r0, fn); - if (KBASE(k) == 1 && rtype(r0) == RCon) { - /* load floating points from memory - * slots, they can't be used as - * immediates - */ - r1 = MEM(fn->nmem); - vgrow(&fn->mem, ++fn->nmem); - memset(&a, 0, sizeof a); - a.offset.type = CAddr; - a.offset.local = 1; - n = stashfp(fn->con[r0.val].bits.i, KWIDE(k)); - sprintf(a.offset.label, "fp%d", n); - fn->mem[fn->nmem-1] = a; - } - else if (!phi && k == Kl && noimm(r0, fn)) { - /* load constants that do not fit in - * a 32bit signed integer into a - * long temporary - */ - r1 = newtmp("isel", Kl, fn); - emit(OCopy, Kl, r1, r0, R); - } - else if (s != -1) { - /* load fast locals' addresses into - * temporaries right before the - * instruction - */ - r1 = newtmp("isel", Kl, fn); - emit(OAddr, Kl, r1, SLOT(s), R); - } - *r = r1; -} - -static void -chuse(Ref r, int du, Fn *fn) -{ - if (rtype(r) == RTmp) - fn->tmp[r.val].nuse += du; -} - -static void -seladdr(Ref *r, ANum *an, Fn *fn) -{ - Addr a; - Ref r0, r1; - - r0 = *r; - if (rtype(r0) == RTmp) { - chuse(r0, -1, fn); - r1 = an[r0.val].mem; - if (req(r1, R)) { - amatch(&a, r0, an, fn, 1); - vgrow(&fn->mem, ++fn->nmem); - fn->mem[fn->nmem-1] = a; - r1 = MEM(fn->nmem-1); - chuse(a.base, +1, fn); - chuse(a.index, +1, fn); - if (rtype(a.base) != RTmp) - if (rtype(a.index) != RTmp) - an[r0.val].mem = r1; - } - *r = r1; - } -} - -static void -selcmp(Ref arg[2], int k, Fn *fn) -{ - Ref r; - - if (rtype(arg[0]) == RCon) { - r = arg[1]; - arg[1] = arg[0]; - arg[0] = r; - } - assert(rtype(arg[0]) != RCon); - emit(OXCmp, k, R, arg[1], arg[0]); - fixarg(&curi->arg[0], k, 0, fn); -} - -static void -sel(Ins i, ANum *an, Fn *fn) -{ - Ref r0, r1; - int x, k, kc; - int64_t val; - Ins *i0; - - if (rtype(i.to) == RTmp) - if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) - if (fn->tmp[i.to.val].nuse == 0) { - chuse(i.arg[0], -1, fn); - chuse(i.arg[1], -1, fn); - return; - } - i0 = curi; - k = i.cls; - switch (i.op) { - case ODiv: - case ORem: - case OUDiv: - case OURem: - if (i.op == ODiv || i.op == OUDiv) - r0 = TMP(RAX), r1 = TMP(RDX); - else - r0 = TMP(RDX), r1 = TMP(RAX); - emit(OCopy, k, i.to, r0, R); - emit(OCopy, k, R, r1, R); - if (rtype(i.arg[1]) == RCon) { - /* immediates not allowed for - * divisions in x86 - */ - r0 = newtmp("isel", k, fn); - } else - r0 = i.arg[1]; - if (i.op == ODiv || i.op == ORem) { - emit(OXIDiv, k, R, r0, R); - emit(OSign, k, TMP(RDX), TMP(RAX), R); - } else { - emit(OXDiv, k, R, r0, R); - emit(OCopy, k, TMP(RDX), CON_Z, R); - } - emit(OCopy, k, TMP(RAX), i.arg[0], R); - if (rtype(i.arg[1]) == RCon) - emit(OCopy, k, r0, i.arg[1], R); - break; - case OSar: - case OShr: - case OShl: - if (rtype(i.arg[1]) == RCon) - goto Emit; - r0 = i.arg[1]; - i.arg[1] = TMP(RCX); - emit(OCopy, Kw, R, TMP(RCX), R); - emiti(i); - emit(OCopy, Kw, TMP(RCX), r0, R); - break; - case ONop: - break; - case OStored: - case OStores: - case OStorel: - case OStorew: - case OStoreh: - case OStoreb: - if (rtype(i.arg[0]) == RCon) { - if (i.op == OStored) - i.op = OStorel; - if (i.op == OStores) - i.op = OStorew; - } - seladdr(&i.arg[1], an, fn); - goto Emit; - case_OLoad: - seladdr(&i.arg[0], an, fn); - goto Emit; - case OCall: - case OSAlloc: - case OCopy: - case OAdd: - case OSub: - case OMul: - case OAnd: - case OOr: - case OXor: - case OXTest: - case OFtosi: - case OSitof: - case OExts: - case OTruncd: - case OCast: - case_OExt: -Emit: - emiti(i); - fixarg(&curi->arg[0], argcls(curi, 0), 0, fn); - fixarg(&curi->arg[1], argcls(curi, 1), 0, fn); - break; - case OAlloc: - case OAlloc+1: - case OAlloc+2: /* == OAlloc1 */ - /* we need to make sure - * the stack remains aligned - * (rsp = 0) mod 16 - */ - if (rtype(i.arg[0]) == RCon) { - assert(fn->con[i.arg[0].val].type == CBits); - val = fn->con[i.arg[0].val].bits.i; - val = (val + 15) & ~INT64_C(15); - if (val < 0 || val > INT32_MAX) - diag("isel: alloc too large"); - emit(OSAlloc, Kl, i.to, getcon(val, fn), R); - } else { - /* r0 = (i.arg[0] + 15) & -16 */ - r0 = newtmp("isel", Kl, fn); - r1 = newtmp("isel", Kl, fn); - emit(OSAlloc, Kl, i.to, r0, R); - emit(OAnd, Kl, r0, r1, getcon(-16, fn)); - emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn)); - } - break; - default: - if (isext(i.op)) - goto case_OExt; - if (isload(i.op)) - goto case_OLoad; - if (iscmp(i.op, &kc, &x)) { - if (rtype(i.arg[0]) == RCon) - x = icmpop(x); - emit(OXSet+x, k, i.to, R, R); - selcmp(i.arg, kc, fn); - break; - } - diag("isel: non-exhaustive implementation"); - } - - while (i0 > curi && --i0) - if (rslot(i0->arg[0], fn) != -1 - || rslot(i0->arg[1], fn) != -1) - diag("isel: usupported address argument"); -} - -static Ins * -flagi(Ins *i0, Ins *i) -{ - while (i>i0) { - i--; - if (opdesc[i->op].sflag) - return i; - if (opdesc[i->op].lflag) - continue; - return 0; - } - return 0; -} - -struct AClass { - int inmem; - int align; - uint size; - int cls[2]; -}; - -static void -aclass(AClass *a, Typ *t) -{ - int e, s, n, cls; - uint sz, al; - - sz = t->size; - al = 1u << t->align; - - /* the ABI requires sizes to be rounded - * up to the nearest multiple of 8, moreover - * it makes it easy load and store structures - * in registers - */ - if (al < 8) - al = 8; - sz = (sz + al-1) & -al; - - a->size = sz; - a->align = t->align; - - if (t->dark || sz > 16) { - /* large or unaligned structures are - * required to be passed in memory - */ - a->inmem = 1; - return; - } - - a->inmem = 0; - for (e=0, s=0; e<2; e++) { - cls = -1; - for (n=0; n<8 && t->seg[s].len; s++) { - if (t->seg[s].ispad) { - /* don't change anything */ - } - else if (t->seg[s].isflt) { - if (cls == -1) - cls = Kd; - } - else - cls = Kl; - n += t->seg[s].len; - } - assert(n <= 8); - a->cls[e] = cls; - } -} - -static void -blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) -{ - Ref r, r1; - uint boff; - - /* it's an impolite blit, we might go across the end - * of the source object a little bit... */ - for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { - r = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(OStorel, 0, R, r, r1); - emit(OAdd, Kl, r1, rstk, getcon(soff, fn)); - r1 = newtmp("abi", Kl, fn); - emit(OLoad, Kl, r, r1, R); - emit(OAdd, Kl, r1, rsrc, getcon(boff, fn)); - chuse(rsrc, +1, fn); - chuse(rstk, +1, fn); - } -} - -static int -retr(Ref reg[2], AClass *aret) -{ - static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; - int n, k, ca, nr[2]; - - nr[0] = nr[1] = 0; - ca = 0; - for (n=0; aret->cls[n]>=0 && n<2; n++) { - k = KBASE(aret->cls[n]); - reg[n] = TMP(retreg[k][nr[k]++]); - ca += 1 << (2 * k); - } - return ca; -} - -static void -selret(Blk *b, Fn *fn) -{ - int j, k, ca; - Ref r, r0, reg[2]; - AClass aret; - - j = b->jmp.type; - - if (!isret(j) || j == JRet0) - return; - - r0 = b->jmp.arg; - b->jmp.type = JRet0; - - if (j == JRetc) { - aclass(&aret, &typ[fn->retty]); - if (aret.inmem) { - assert(rtype(fn->retr) == RTmp); - emit(OCopy, Kl, TMP(RAX), fn->retr, R); - chuse(fn->retr, +1, fn); - blit(fn->retr, 0, r0, aret.size, fn); - ca = 1; - } else { - ca = retr(reg, &aret); - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - emit(OLoad, Kl, reg[1], r, R); - emit(OAdd, Kl, r, r0, getcon(8, fn)); - chuse(r0, +1, fn); - } - emit(OLoad, Kl, reg[0], r0, R); - } - } else { - k = j - JRetw; - if (KBASE(k) == 0) { - emit(OCopy, k, TMP(RAX), r0, R); - ca = 1; - } else { - emit(OCopy, k, TMP(XMM0), r0, R); - ca = 1 << 2; - } - } - - b->jmp.arg = CALL(ca); -} - -static void -seljmp(Blk *b, Fn *fn) -{ - Ref r; - int c, k; - Ins *fi; - - if (b->jmp.type == JRet0 || b->jmp.type == JJmp) - return; - assert(b->jmp.type == JJnz); - r = b->jmp.arg; - b->jmp.arg = R; - assert(!req(r, R)); - if (rtype(r) == RCon) { - b->jmp.type = JJmp; - if (req(r, CON_Z)) - b->s1 = b->s2; - b->s2 = 0; - return; - } - fi = flagi(b->ins, &b->ins[b->nins]); - if (fi && req(fi->to, r)) { - if (iscmp(fi->op, &k, &c)) { - if (rtype(fi->arg[0]) == RCon) - c = icmpop(c); - b->jmp.type = JXJc + c; - if (fn->tmp[r.val].nuse == 1) { - assert(fn->tmp[r.val].ndef == 1); - selcmp(fi->arg, k, fn); - *fi = (Ins){.op = ONop}; - } - return; - } - if (fi->op == OAnd && fn->tmp[r.val].nuse == 1 - && (rtype(fi->arg[0]) == RTmp || - rtype(fi->arg[1]) == RTmp)) { - fi->op = OXTest; - fi->to = R; - b->jmp.type = JXJc + ICne; - if (rtype(fi->arg[1]) == RCon) { - r = fi->arg[1]; - fi->arg[1] = fi->arg[0]; - fi->arg[0] = r; - } - return; - } - /* since flags are not tracked in liveness, - * the result of the flag-setting instruction - * has to be marked as live - */ - if (fn->tmp[r.val].nuse == 1) - emit(OCopy, Kw, R, r, R); - b->jmp.type = JXJc + ICne; - return; - } - selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */ - b->jmp.type = JXJc + ICne; -} - -static int -classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret) -{ - int nint, ni, nsse, ns, n, *pn; - AClass *a; - Ins *i; - - if (aret && aret->inmem) - nint = 5; /* hidden argument */ - else - nint = 6; - nsse = 8; - for (i=i0, a=ac; iop == op) { - if (KBASE(i->cls) == 0) - pn = &nint; - else - pn = &nsse; - if (*pn > 0) { - --*pn; - a->inmem = 0; - } else - a->inmem = 2; - a->align = 3; - a->size = 8; - a->cls[0] = i->cls; - } else { - n = i->arg[0].val & AMask; - aclass(a, &typ[n]); - if (a->inmem) - continue; - ni = ns = 0; - for (n=0; n<2; n++) - if (KBASE(a->cls[n]) == 0) - ni++; - else - ns++; - if (nint >= ni && nsse >= ns) { - nint -= ni; - nsse -= ns; - } else - a->inmem = 1; - } - } - - return ((6-nint) << 4) | ((8-nsse) << 8); -} - -int rsave[] = { - RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 -}; -int rclob[] = {RBX, R12, R13, R14, R15}; - -MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); -MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); - -bits -retregs(Ref r, int p[2]) -{ - bits b; - int ni, nf; - - assert(rtype(r) == RACall); - b = 0; - ni = r.val & 3; - nf = (r.val >> 2) & 3; - if (ni >= 1) - b |= BIT(RAX); - if (ni >= 2) - b |= BIT(RDX); - if (nf >= 1) - b |= BIT(XMM0); - if (nf >= 2) - b |= BIT(XMM1); - if (p) { - p[0] = ni; - p[1] = nf; - } - return b; -} - -bits -argregs(Ref r, int p[2]) -{ - bits b; - int j, ni, nf; - - assert(rtype(r) == RACall); - b = 0; - ni = (r.val >> 4) & 15; - nf = (r.val >> 8) & 15; - for (j=0; jarg[1], R)) { - assert(rtype(i1->arg[1]) == RAType); - aclass(&aret, &typ[i1->arg[1].val & AMask]); - ca = classify(i0, i1, ac, OArg, &aret); - } else - ca = classify(i0, i1, ac, OArg, 0); - - for (stk=0, a=&ac[i1-i0]; a>ac;) - if ((--a)->inmem) { - assert(a->align <= 4); - stk += a->size; - if (a->align == 4) - stk += stk & 15; - } - stk += stk & 15; - if (stk) { - r = getcon(-(int64_t)stk, fn); - emit(OSAlloc, Kl, R, r, R); - } - - if (!req(i1->arg[1], R)) { - if (aret.inmem) { - /* get the return location from eax - * it saves one callee-save reg */ - r1 = newtmp("abi", Kl, fn); - emit(OCopy, Kl, i1->to, TMP(RAX), R); - ca += 1; - } else { - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - regcp[1] = newtmp("abi", aret.cls[1], fn); - emit(OStorel, 0, R, regcp[1], r); - emit(OAdd, Kl, r, i1->to, getcon(8, fn)); - chuse(i1->to, +1, fn); - ca += 1 << (2 * KBASE(aret.cls[1])); - } - regcp[0] = newtmp("abi", aret.cls[0], fn); - emit(OStorel, 0, R, regcp[0], i1->to); - ca += 1 << (2 * KBASE(aret.cls[0])); - retr(reg, &aret); - if (aret.size > 8) - emit(OCopy, aret.cls[1], regcp[1], reg[1], R); - emit(OCopy, aret.cls[0], regcp[0], reg[0], R); - r1 = i1->to; - } - /* allocate return pad */ - ra = alloc(sizeof *ra); - assert(NAlign == 3); - aret.align -= 2; - if (aret.align < 0) - aret.align = 0; - ra->i.op = OAlloc + aret.align; - ra->i.cls = Kl; - ra->i.to = r1; - ra->i.arg[0] = getcon(aret.size, fn); - ra->link = (*rap); - *rap = ra; - } else { - ra = 0; - if (KBASE(i1->cls) == 0) { - emit(OCopy, i1->cls, i1->to, TMP(RAX), R); - ca += 1; - } else { - emit(OCopy, i1->cls, i1->to, TMP(XMM0), R); - ca += 1 << 2; - } - } - emit(OCall, i1->cls, R, i1->arg[0], CALL(ca)); - emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); - - ni = ns = 0; - if (ra && aret.inmem) - emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ - for (i=i0, a=ac; iinmem) - continue; - r1 = rarg(a->cls[0], &ni, &ns); - if (i->op == OArgc) { - if (a->size > 8) { - r2 = rarg(a->cls[1], &ni, &ns); - r = newtmp("abi", Kl, fn); - emit(OLoad, a->cls[1], r2, r, R); - emit(OAdd, Kl, r, i->arg[1], getcon(8, fn)); - chuse(i->arg[1], +1, fn); - } - emit(OLoad, a->cls[0], r1, i->arg[1], R); - } else - emit(OCopy, i->cls, r1, i->arg[0], R); - } - - if (!stk) - return; - - r = newtmp("abi", Kl, fn); - chuse(r, -1, fn); - for (i=i0, a=ac, off=0; iinmem) - continue; - if (i->op == OArgc) { - if (a->align == 4) - off += off & 15; - blit(r, off, i->arg[1], a->size, fn); - } else { - r1 = newtmp("abi", Kl, fn); - emit(OStorel, 0, R, i->arg[0], r1); - emit(OAdd, Kl, r1, r, getcon(off, fn)); - chuse(r, +1, fn); - } - off += a->size; - } - emit(OSAlloc, Kl, r, getcon(stk, fn), R); -} - -static void -selpar(Fn *fn, Ins *i0, Ins *i1) -{ - AClass *ac, *a, aret; - Ins *i; - int ni, ns, s, al; - Ref r, r1; - - ac = alloc((i1-i0) * sizeof ac[0]); - curi = insb; - ni = ns = 0; - - if (fn->retty >= 0) { - aclass(&aret, &typ[fn->retty]); - if (aret.inmem) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl}; - fn->retr = r; - } - classify(i0, i1, ac, OPar, &aret); - } else - classify(i0, i1, ac, OPar, 0); - - assert(NAlign == 3); - - s = 4; - for (i=i0, a=ac; iinmem) { - case 1: - assert(a->align <= 4); - if (a->align == 4) - s = (s+3) & -4; - fn->tmp[i->to.val].slot = -s; /* HACK! */ - s += a->size / 4; - continue; - case 2: - *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls}; - s += 2; - continue; - } - r1 = rarg(a->cls[0], &ni, &ns); - if (i->op == OParc) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {r1}, Kl}; - a->cls[0] = r.val; - if (a->size > 8) { - r1 = rarg(a->cls[1], &ni, &ns); - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {r1}, Kl}; - a->cls[1] = r.val; - } - } else - *curi++ = (Ins){OCopy, i->to, {r1}, i->cls}; - } - for (i=i0, a=ac; iop != OParc || a->inmem) - continue; - assert(NAlign == 3); - for (al=0; a->align >> (al+2); al++) - ; - r = TMP(a->cls[0]); - r1 = i->to; - *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl}; - *curi++ = (Ins){OStorel, R, {r, r1}, 0}; - if (a->size > 8) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl}; - r1 = TMP(a->cls[1]); - *curi++ = (Ins){OStorel, R, {r1, r}, 0}; - } - } -} - -static int -aref(Ref r, ANum *ai) -{ - switch (rtype(r)) { - default: - diag("isel: aref defaulted"); - case RCon: - return 2; - case RTmp: - return ai[r.val].n; - } -} - -static int -ascale(Ref r, Con *con) -{ - int64_t n; - - if (rtype(r) != RCon) - return 0; - if (con[r.val].type != CBits) - return 0; - n = con[r.val].bits.i; - return n == 1 || n == 2 || n == 4 || n == 8; -} - -static void -anumber(ANum *ai, Blk *b, Con *con) -{ - /* This should be made obsolete by a proper - * reassoc pass. - * - * Rules: - * - * RTmp(_) -> 0 tmp - * ( RTmp(_) -> 1 slot ) - * RCon(_) -> 2 con - * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) - */ - static char add[10][10] = { - [2] [2] = 2, /* folding */ - [2] [5] = 5, [5] [2] = 5, - [2] [6] = 6, [6] [2] = 6, - [2] [7] = 7, [7] [2] = 7, - [0] [0] = 4, /* 4: b + s * i */ - [0] [3] = 4, [3] [0] = 4, - [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */ - [0] [2] = 6, [2] [0] = 6, /* 6: o + b */ - [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */ - [0] [5] = 7, [5] [0] = 7, - [6] [3] = 7, [3] [6] = 7, - - }; - int a, a1, a2, n1, n2, t1, t2; - Ins *i; - - for (i=b->ins; i-b->ins < b->nins; i++) { - if (rtype(i->to) == RTmp) - ai[i->to.val].i = i; - if (i->op != OAdd && i->op != OMul) - continue; - a1 = aref(i->arg[0], ai); - a2 = aref(i->arg[1], ai); - t1 = a1 != 1 && a1 != 2; - t2 = a2 != 1 && a2 != 2; - if (i->op == OAdd) { - a = add[n1 = a1][n2 = a2]; - if (t1 && a < add[0][a2]) - a = add[n1 = 0][n2 = a2]; - if (t2 && a < add[a1][0]) - a = add[n1 = a1][n2 = 0]; - if (t1 && t2 && a < add[0][0]) - a = add[n1 = 0][n2 = 0]; - } else { - n1 = n2 = a = 0; - if (ascale(i->arg[0], con) && t2) - a = 3, n1 = 2, n2 = 0; - if (t1 && ascale(i->arg[1], con)) - a = 3, n1 = 0, n2 = 2; - } - ai[i->to.val].n = a; - ai[i->to.val].l = n1; - ai[i->to.val].r = n2; - } -} - -static void -amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top) -{ - Ins *i; - int nl, nr, t, s; - Ref al, ar; - - if (top) - memset(a, 0, sizeof *a); - if (rtype(r) == RCon) { - addcon(&a->offset, &fn->con[r.val]); - return; - } - assert(rtype(r) == RTmp); - i = ai[r.val].i; - nl = ai[r.val].l; - nr = ai[r.val].r; - if (i) { - if (nl > nr) { - al = i->arg[1]; - ar = i->arg[0]; - t = nl, nl = nr, nr = t; - } else { - al = i->arg[0]; - ar = i->arg[1]; - } - } - switch (ai[r.val].n) { - default: - diag("isel: amatch defaulted"); - case 3: /* s * i */ - if (!top) { - a->index = al; - a->scale = fn->con[ar.val].bits.i; - } else - a->base = r; - break; - case 4: /* b + s * i */ - switch (nr) { - case 0: - if (fn->tmp[ar.val].slot != -1) { - al = i->arg[1]; - ar = i->arg[0]; - } - a->index = ar; - a->scale = 1; - break; - case 3: - amatch(a, ar, ai, fn, 0); - break; - } - r = al; - case 0: - s = fn->tmp[r.val].slot; - if (s != -1) - r = SLOT(s); - a->base = r; - break; - case 2: /* constants */ - case 5: /* o + s * i */ - case 6: /* o + b */ - case 7: /* o + b + s * i */ - amatch(a, ar, ai, fn, 0); - amatch(a, al, ai, fn, 0); - break; - } -} - -/* instruction selection - * requires use counts (as given by parsing) - */ -void -isel(Fn *fn) -{ - Blk *b, **sb; - Ins *i, *i0, *ip; - Phi *p; - uint a; - int n, al; - int64_t sz; - ANum *ainfo; - RAlloc *ral; - - for (n=0; nntmp; n++) - fn->tmp[n].slot = -1; - fn->slot = 0; - - /* lower arguments */ - for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) - if (i->op != OPar && i->op != OParc) - break; - selpar(fn, b->ins, i); - n = b->nins - (i - b->ins) + (curi - insb); - i0 = alloc(n * sizeof(Ins)); - ip = icpy(ip = i0, insb, curi - insb); - ip = icpy(ip, i, &b->ins[b->nins] - i); - b->nins = n; - b->ins = i0; - - /* lower function calls and returns */ - ral = 0; - b = fn->start; - do { - if (!(b = b->link)) - b = fn->start; /* do it last */ - curi = &insb[NIns]; - selret(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) { - if ((--i)->op == OCall) { - for (i0=i; i0>b->ins; i0--) - if ((i0-1)->op != OArg) - if ((i0-1)->op != OArgc) - break; - selcall(fn, i0, i, &ral); - i = i0; - continue; - } - assert(i->op != OArg && i->op != OArgc); - emiti(*i); - } - if (b == fn->start) - for (; ral; ral=ral->link) - emiti(ral->i); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } while (b != fn->start); - - if (debug['A']) { - fprintf(stderr, "\n> After call lowering:\n"); - printfn(fn, stderr); - } - - /* assign slots to fast allocs */ - b = fn->start; - assert(NAlign == 3 && "change n=4 and sz /= 4 below"); - for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2) - for (i=b->ins; i-b->ins < b->nins; i++) - if (i->op == al) { - if (rtype(i->arg[0]) != RCon) - break; - sz = fn->con[i->arg[0].val].bits.i; - if (sz < 0 || sz >= INT_MAX-3) - diag("isel: invalid alloc size"); - sz = (sz + n-1) & -n; - sz /= 4; - fn->tmp[i->to.val].slot = fn->slot; - fn->slot += sz; - *i = (Ins){.op = ONop}; - } - - /* process basic blocks */ - n = fn->ntmp; - ainfo = emalloc(n * sizeof ainfo[0]); - for (b=fn->start; b; b=b->link) { - curi = &insb[NIns]; - for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) - for (p=(*sb)->phi; p; p=p->link) { - for (a=0; p->blk[a] != b; a++) - assert(a+1 < p->narg); - fixarg(&p->arg[a], p->cls, 1, fn); - } - memset(ainfo, 0, n * sizeof ainfo[0]); - anumber(ainfo, b, fn->con); - seljmp(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) - sel(*--i, ainfo, fn); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } - free(ainfo); - - if (debug['I']) { - fprintf(stderr, "\n> After instruction selection:\n"); - printfn(fn, stderr); - } -} diff --git a/src/live.c b/src/live.c deleted file mode 100644 index 44806e1..0000000 --- a/src/live.c +++ /dev/null @@ -1,174 +0,0 @@ -#include "all.h" - -void -liveon(BSet *v, Blk *b, Blk *s) -{ - Phi *p; - uint a; - - bscopy(v, s->in); - for (p=s->phi; p; p=p->link) { - bsclr(v, p->to.val); - for (a=0; anarg; a++) - if (p->blk[a] == b) - if (rtype(p->arg[a]) == RTmp) - bsset(v, p->arg[a].val); - } -} - -static int -phitmp(int t, Tmp *tmp) -{ - int tp; - - tp = tmp[t].phi; - return tp ? tp : t; -} - -static void -phifix(int t1, short *phi, Tmp *tmp) -{ - int t, t2; - - /* detect temporaries arguments - * of the same phi node that - * interfere and separate them - */ - t = phitmp(t1, tmp); - t2 = phi[t]; - if (t2 && t2 != t1) { - if (t != t1) { - tmp[t1].phi = t1; - t = t1; - } else { - tmp[t2].phi = t2; - phi[t2] = t2; - } - } - phi[t] = t1; -} - -static void -bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp) -{ - - if (rtype(r) != RTmp) - return; - bsset(b->gen, r.val); - phifix(r.val, phi, tmp); - if (!bshas(b->in, r.val)) { - nlv[KBASE(tmp[r.val].cls)]++; - bsset(b->in, r.val); - } -} - -/* liveness analysis - * requires rpo computation - */ -void -filllive(Fn *f) -{ - Blk *b; - Ins *i; - int k, t, m[2], n, chg, nlv[2]; - short *phi; - BSet u[1], v[1]; - Mem *ma; - - bsinit(u, f->ntmp); - bsinit(v, f->ntmp); - phi = emalloc(f->ntmp * sizeof phi[0]); - for (b=f->start; b; b=b->link) { - bsinit(b->in, f->ntmp); - bsinit(b->out, f->ntmp); - bsinit(b->gen, f->ntmp); - } - chg = 1; -Again: - for (n=f->nblk-1; n>=0; n--) { - b = f->rpo[n]; - - bscopy(u, b->out); - if (b->s1) { - liveon(v, b, b->s1); - bsunion(b->out, v); - } - if (b->s2) { - liveon(v, b, b->s2); - bsunion(b->out, v); - } - chg |= !bsequal(b->out, u); - - memset(phi, 0, f->ntmp * sizeof phi[0]); - memset(nlv, 0, sizeof nlv); - bscopy(b->in, b->out); - for (t=0; tntmp; t++) - if (bshas(b->in, t)) { - phifix(t, phi, f->tmp); - nlv[KBASE(f->tmp[t].cls)]++; - } - if (rtype(b->jmp.arg) == RACall) { - assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0); - b->in->t[0] |= retregs(b->jmp.arg, nlv); - } else - bset(b->jmp.arg, b, nlv, phi, f->tmp); - for (k=0; k<2; k++) - b->nlive[k] = nlv[k]; - for (i=&b->ins[b->nins]; i!=b->ins;) { - if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) { - b->in->t[0] &= ~retregs(i->arg[1], m); - for (k=0; k<2; k++) - nlv[k] -= m[k]; - if (nlv[0] + NISave > b->nlive[0]) - b->nlive[0] = nlv[0] + NISave; - if (nlv[1] + NFSave > b->nlive[1]) - b->nlive[1] = nlv[1] + NFSave; - b->in->t[0] |= argregs(i->arg[1], m); - for (k=0; k<2; k++) - nlv[k] += m[k]; - } - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - t = i->to.val; - if (bshas(b->in, i->to.val)) - nlv[KBASE(f->tmp[t].cls)]--; - bsset(b->gen, t); - bsclr(b->in, t); - phi[phitmp(t, f->tmp)] = 0; - } - for (k=0; k<2; k++) - switch (rtype(i->arg[k])) { - case RAMem: - ma = &f->mem[i->arg[k].val & AMask]; - bset(ma->base, b, nlv, phi, f->tmp); - bset(ma->index, b, nlv, phi, f->tmp); - break; - default: - bset(i->arg[k], b, nlv, phi, f->tmp); - break; - } - for (k=0; k<2; k++) - if (nlv[k] > b->nlive[k]) - b->nlive[k] = nlv[k]; - } - } - if (chg) { - chg = 0; - goto Again; - } - free(phi); - - if (debug['L']) { - fprintf(stderr, "\n> Liveness analysis:\n"); - for (b=f->start; b; b=b->link) { - fprintf(stderr, "\t%-10sin: ", b->name); - dumpts(b->in, f->tmp, stderr); - fprintf(stderr, "\t out: "); - dumpts(b->out, f->tmp, stderr); - fprintf(stderr, "\t gen: "); - dumpts(b->gen, f->tmp, stderr); - fprintf(stderr, "\t live: "); - fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]); - } - } -} diff --git a/src/main.c b/src/main.c deleted file mode 100644 index c1664be..0000000 --- a/src/main.c +++ /dev/null @@ -1,140 +0,0 @@ -#include "all.h" -#include "config.h" -#include -#include - -char debug['Z'+1] = { - ['P'] = 0, /* parsing */ - ['A'] = 0, /* abi lowering */ - ['I'] = 0, /* instruction selection */ - ['L'] = 0, /* liveness */ - ['M'] = 0, /* memory optimization */ - ['N'] = 0, /* ssa construction */ - ['C'] = 0, /* copy elimination */ - ['S'] = 0, /* spilling */ - ['R'] = 0, /* reg. allocation */ -}; - -static FILE *outf; -static int dbg; - -static void -data(Dat *d) -{ - if (dbg) - return; - if (d->type == DEnd) { - fputs("/* end data */\n\n", outf); - freeall(); - } - emitdat(d, outf); -} - -static void -func(Fn *fn) -{ - int n; - - if (dbg) - fprintf(stderr, "**** Function %s ****", fn->name); - if (debug['P']) { - fprintf(stderr, "\n> After parsing:\n"); - printfn(fn, stderr); - } - fillrpo(fn); - fillpreds(fn); - filluse(fn); - memopt(fn); - ssa(fn); - filluse(fn); - copy(fn); - filluse(fn); - isel(fn); - filllive(fn); - fillcost(fn); - spill(fn); - rega(fn); - fillrpo(fn); - assert(fn->rpo[0] == fn->start); - for (n=0;; n++) - if (n == fn->nblk-1) { - fn->rpo[n]->link = 0; - break; - } else - fn->rpo[n]->link = fn->rpo[n+1]; - if (!dbg) { - emitfn(fn, outf); - fprintf(outf, "/* end function %s */\n\n", fn->name); - } else - fprintf(stderr, "\n"); - freeall(); -} - -int -main(int ac, char *av[]) -{ - FILE *inf; - char *f; - int c, asm; - - asm = Defaultasm; - outf = stdout; - while ((c = getopt(ac, av, "d:o:G:")) != -1) - switch (c) { - case 'd': - for (; *optarg; optarg++) - if (isalpha(*optarg)) { - debug[toupper(*optarg)] = 1; - dbg = 1; - } - break; - case 'o': - if (strcmp(optarg, "-") != 0) - outf = fopen(optarg, "w"); - break; - case 'G': - if (strcmp(optarg, "e") == 0) - asm = Gaself; - else if (strcmp(optarg, "m") == 0) - asm = Gasmacho; - else { - fprintf(stderr, "unknown gas flavor '%s'\n", optarg); - exit(1); - } - break; - default: - fprintf(stderr, "usage: %s [-d ] [-o out] {file.ssa, -}\n", av[0]); - exit(1); - } - - switch (asm) { - case Gaself: - locprefix = ".L"; - symprefix = ""; - break; - case Gasmacho: - locprefix = "L"; - symprefix = "_"; - break; - } - - do { - f = av[optind]; - if (!f || strcmp(f, "-") == 0) { - inf = stdin; - f = "-"; - } else { - inf = fopen(f, "r"); - if (!inf) { - fprintf(stderr, "cannot open '%s'\n", f); - exit(1); - } - } - parse(inf, f, data, func); - } while (++optind < ac); - - if (!dbg) - emitfin(outf); - - exit(0); -} diff --git a/src/mem.c b/src/mem.c deleted file mode 100644 index bda43d7..0000000 --- a/src/mem.c +++ /dev/null @@ -1,81 +0,0 @@ -#include "all.h" - -/* Memory optimization: - * - * - replace alloced slots used only in - * load/store operations - * Assumption: all the accesses have the - * same size (this could be wrong...) - */ - -/* require use, maintains use counts */ -void -memopt(Fn *fn) -{ - Blk *b; - Ins *i, *l; - Tmp *t; - Use *u, *ue; - int a; - - b = fn->start; - for (i=b->ins; i-b->ins < b->nins; i++) { - if (OAlloc > i->op || i->op > OAlloc1) - continue; - assert(NAlign == 3); - assert(rtype(i->to) == RTmp); - t = &fn->tmp[i->to.val]; - for (u=t->use; u != &t->use[t->nuse]; u++) { - if (u->type != UIns) - goto NextIns; - l = u->u.ins; - if (!isload(l->op) - && (!isstore(l->op) || req(i->to, l->arg[0]))) - goto NextIns; - } - /* get rid of the alloc and replace uses */ - *i = (Ins){.op = ONop}; - t->ndef--; - ue = &t->use[t->nuse]; - for (u=t->use; u!=ue; u++) { - l = u->u.ins; - if (isstore(l->op)) { - if (l->op == OStores) - l->cls = Kd; - else if (l->op == OStored) - l->cls = Kd; - else if (l->op == OStorel) - l->cls = Kl; - else - l->cls = Kw; - l->op = OCopy; - l->to = l->arg[1]; - l->arg[1] = R; - t->nuse--; - t->ndef++; - } else - /* try to turn loads into copies so we - * can eliminate them later */ - switch(l->op) { - case OLoad: - l->op = OCopy; - break; - case OLoadsw: - case OLoaduw: - l->cls = Kw; - l->op = OCopy; - break; - default: - /* keep l->cls */ - a = l->op - OLoadsw; - l->op = OExtsw + a; - break; - } - } - NextIns:; - } - if (debug['M']) { - fprintf(stderr, "\n> After memory optimization:\n"); - printfn(fn, stderr); - } -} diff --git a/src/parse.c b/src/parse.c deleted file mode 100644 index 2590971..0000000 --- a/src/parse.c +++ /dev/null @@ -1,1099 +0,0 @@ -#include "all.h" -#include -#include - -enum { - Kx = -1, /* Invalid operand */ - Km = Kl, /* Memory pointer (for x64) */ -}; - -OpDesc opdesc[NOp] = { -#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d} - - /* NAME NM ARGCLS0 ARGCLS1 SF LF */ - [OAdd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OSub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [ODiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [ORem] = { "rem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, - [OUDiv] = { "udiv", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [OURem] = { "urem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, - [OMul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [OAnd] = { "and", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OOr] = { "or", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OXor] = { "xor", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OSar] = { "sar", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, - [OShr] = { "shr", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, - [OShl] = { "shl", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, - [OStored] = { "stored", 0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 }, - [OStores] = { "stores", 0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 }, - [OStorel] = { "storel", 0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 }, - [OStorew] = { "storew", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, - [OStoreh] = { "storeh", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, - [OStoreb] = { "storeb", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, - [OLoad] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 }, - [OLoadsw] = { "loadsw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoaduw] = { "loaduw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoadsh] = { "loadsh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoaduh] = { "loaduh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoadsb] = { "loadsb", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoadub] = { "loadub", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OExtsw] = { "extsw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtuw] = { "extuw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtsh] = { "extsh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtuh] = { "extuh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtsb] = { "extsb", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtub] = { "extub", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExts] = { "exts", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OTruncd] = { "truncd", 0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 }, - [OFtosi] = { "ftosi", 0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 }, - [OSitof] = { "sitof", 0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 }, - [OCast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 }, - [OCopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 }, - [ONop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, - [OSwap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [OSign] = { "sign", 0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, - [OSAlloc] = { "salloc", 0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 }, - [OXDiv] = { "xdiv", 1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, - [OXCmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OXTest] = { "xtest", 1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 }, - [OAddr] = { "addr", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OPar] = { "parn", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OParc] = { "parc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OArg] = { "arg", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OArgc] = { "argc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OCall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 }, - [OXSetnp] = { "xsetnp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OXSetp] = { "xsetp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OAlloc] = { "alloc4", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, - [OAlloc+1] = { "alloc8", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, - [OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, -#define X(c) \ - [OCmpw+IC##c] = { "c" #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \ - [OCmpl+IC##c] = { "c" #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \ - [OXSet+IC##c] = { "xset" #c, 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, - ICMPS(X) -#undef X -#define X(c) \ - [OCmps+FC##c] = { "c" #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \ - [OCmpd+FC##c] = { "c" #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 }, - FCMPS(X) -#undef X - -}; -#undef A - -typedef enum { - PXXX, - PLbl, - PPhi, - PIns, - PEnd, -} PState; - -enum { - TXXX = NPubOp, - TCall, - TPhi, - TJmp, - TJnz, - TRet, - TExport, - TFunc, - TType, - TData, - TAlign, - TL, - TW, - TH, - TB, - TD, - TS, - TZ, - - TInt, - TFlts, - TFltd, - TTmp, - TLbl, - TGlo, - TTyp, - TStr, - - TPlus, - TEq, - TComma, - TLParen, - TRParen, - TLBrace, - TRBrace, - TNL, - TEOF, -}; - - -static FILE *inf; -static char *inpath; -static int thead; -static struct { - char chr; - double fltd; - float flts; - int64_t num; - char *str; -} tokval; -static int lnum; - -static Tmp *tmp; -static Con *con; -static int ntmp; -static int ncon; -static Phi **plink; -static Blk **bmap; -static Blk *curb; -static Blk **blink; -static int nblk; -static int rcls; -static int ntyp; - - -void -err(char *s, ...) -{ - char buf[100], *p, *end; - va_list ap; - - p = buf; - end = buf + sizeof(buf); - - va_start(ap, s); - p += snprintf(p, end - p, "%s:%d: ", inpath, lnum); - p += vsnprintf(p, end - p, s, ap); - va_end(ap); - - diag(buf); -} - -static int -lex() -{ - static struct { - char *str; - int tok; - } tmap[] = { - { "call", TCall }, - { "phi", TPhi }, - { "jmp", TJmp }, - { "jnz", TJnz }, - { "ret", TRet }, - { "export", TExport }, - { "function", TFunc }, - { "type", TType }, - { "data", TData }, - { "align", TAlign }, - { "l", TL }, - { "w", TW }, - { "h", TH }, - { "b", TB }, - { "d", TD }, - { "s", TS }, - { "z", TZ }, - { "loadw", OLoad }, /* for convenience */ - { "loadl", OLoad }, - { "loads", OLoad }, - { "loadd", OLoad }, - { "alloc1", OAlloc }, - { "alloc2", OAlloc }, - { 0, TXXX } - }; - static char tok[NString]; - int c, i; - int t; - - do - c = fgetc(inf); - while (isblank(c)); - t = TXXX; - tokval.chr = c; - switch (c) { - case EOF: - return TEOF; - case ',': - return TComma; - case '(': - return TLParen; - case ')': - return TRParen; - case '{': - return TLBrace; - case '}': - return TRBrace; - case '=': - return TEq; - case '+': - return TPlus; - case 's': - if (fscanf(inf, "_%f", &tokval.flts) != 1) - break; - return TFlts; - case 'd': - if (fscanf(inf, "_%lf", &tokval.fltd) != 1) - break; - return TFltd; - case '%': - t = TTmp; - goto Alpha; - case '@': - t = TLbl; - goto Alpha; - case '$': - t = TGlo; - goto Alpha; - case ':': - t = TTyp; - goto Alpha; - case '#': - while (fgetc(inf) != '\n') - ; - case '\n': - lnum++; - return TNL; - } - if (isdigit(c) || c == '-' || c == '+') { - ungetc(c, inf); - if (fscanf(inf, "%"SCNd64, &tokval.num) != 1) - err("invalid integer literal"); - return TInt; - } - if (c == '"') { - tokval.str = vnew(0, 1); - for (i=0;; i++) { - c = fgetc(inf); - vgrow(&tokval.str, i+1); - if (c == '"') - if (!i || tokval.str[i-1] != '\\') { - tokval.str[i] = 0; - return TStr; - } - tokval.str[i] = c; - } - } - if (0) -Alpha: c = fgetc(inf); - if (!isalpha(c) && c != '.' && c != '_') - err("lexing failure: invalid character %c (%d)", c, c); - i = 0; - do { - if (i >= NString-1) - err("identifier too long"); - tok[i++] = c; - c = fgetc(inf); - } while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c)); - tok[i] = 0; - ungetc(c, inf); - tokval.str = tok; - if (t != TXXX) { - return t; - } - for (i=0; i= NIns) - err("too many instructions (1)"); - k = parsecls(&ty); - r = parseref(); - if (req(r, R)) - err("invalid reference argument"); - if (!arg && rtype(r) != RTmp) - err("invalid function parameter"); - if (k == 4) - if (arg) - *curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl}; - else - *curi = (Ins){OParc, r, {TYPE(ty)}, Kl}; - else - if (arg) - *curi = (Ins){OArg, R, {r}, k}; - else - *curi = (Ins){OPar, r, {R}, k}; - curi++; - t = next(); - if (t == TRParen) - break; - if (t != TComma) - err(", or ) expected"); - } -} - -static Blk * -findblk(char *name) -{ - int i; - - for (i=0; iname, name) == 0) - return bmap[i]; - vgrow(&bmap, ++nblk); - bmap[i] = blknew(); - strcpy(bmap[i]->name, name); - return bmap[i]; -} - -static void -closeblk() -{ - curb->nins = curi - insb; - idup(&curb->ins, insb, curb->nins); - blink = &curb->link; - curi = insb; -} - -static PState -parseline(PState ps) -{ - Ref arg[NPred] = {R}; - Blk *blk[NPred]; - Phi *phi; - Ref r; - Blk *b; - int t, op, i, k, ty; - - t = nextnl(); - if (ps == PLbl && t != TLbl && t != TRBrace) - err("label or } expected"); - switch (t) { - default: - if (isstore(t)) { - /* operations without result */ - r = R; - k = 0; - op = t; - goto DoOp; - } - err("label, instruction or jump expected"); - case TRBrace: - return PEnd; - case TTmp: - break; - case TLbl: - b = findblk(tokval.str); - if (b->jmp.type != JXXX) - err("multiple definitions of block"); - if (curb && curb->jmp.type == JXXX) { - closeblk(); - curb->jmp.type = JJmp; - curb->s1 = b; - } - *blink = b; - curb = b; - plink = &curb->phi; - expect(TNL); - return PPhi; - case TRet: - curb->jmp.type = (int[]){ - JRetw, JRetl, - JRets, JRetd, - JRetc, JRet0 - }[rcls]; - if (rcls < 5) { - r = parseref(); - if (req(r, R)) - err("return value expected"); - curb->jmp.arg = r; - } - goto Close; - case TJmp: - curb->jmp.type = JJmp; - goto Jump; - case TJnz: - curb->jmp.type = JJnz; - r = parseref(); - if (req(r, R)) - err("invalid argument for jnz jump"); - curb->jmp.arg = r; - expect(TComma); - Jump: - expect(TLbl); - curb->s1 = findblk(tokval.str); - if (curb->jmp.type != JJmp) { - expect(TComma); - expect(TLbl); - curb->s2 = findblk(tokval.str); - } - Close: - expect(TNL); - closeblk(); - return PLbl; - } - r = tmpref(tokval.str); - expect(TEq); - k = parsecls(&ty); - op = next(); -DoOp: - if (op == TPhi) { - if (ps != PPhi) - err("unexpected phi instruction"); - op = -1; - } - if (op == TCall) { - arg[0] = parseref(); - parserefl(1); - expect(TNL); - op = OCall; - if (k == 4) { - k = Kl; - arg[1] = TYPE(ty); - } else - arg[1] = R; - goto Ins; - } - if (k == 4) - err("size class must be w, l, s, or d"); - if (op >= NPubOp) - err("invalid instruction"); - i = 0; - if (peek() != TNL) - for (;;) { - if (i == NPred) - err("too many arguments"); - if (op == -1) { - expect(TLbl); - blk[i] = findblk(tokval.str); - } - arg[i] = parseref(); - if (req(arg[i], R)) - err("invalid instruction argument"); - i++; - t = peek(); - if (t == TNL) - break; - if (t != TComma) - err(", or end of line expected"); - next(); - } - next(); - if (op != -1) { - Ins: - if (curi - insb >= NIns) - err("too many instructions (2)"); - curi->op = op; - curi->cls = k; - curi->to = r; - curi->arg[0] = arg[0]; - curi->arg[1] = arg[1]; - curi++; - return PIns; - } else { - phi = alloc(sizeof *phi); - phi->to = r; - phi->cls = k; - memcpy(phi->arg, arg, i * sizeof arg[0]); - memcpy(phi->blk, blk, i * sizeof blk[0]); - phi->narg = i; - *plink = phi; - plink = &phi->link; - return PPhi; - } -} - -static Fn * -parsefn(int export) -{ - PState ps; - Fn *fn; - - ntmp = Tmp0; - ncon = 1; /* first constant must be 0 */ - curb = 0; - nblk = 0; - curi = insb; - tmp = vnew(ntmp, sizeof tmp[0]); - con = vnew(ncon, sizeof con[0]); - bmap = vnew(nblk, sizeof bmap[0]); - con[0].type = CBits; - fn = alloc(sizeof *fn); - fn->export = export; - blink = &fn->start; - fn->retty = -1; - if (peek() != TGlo) - rcls = parsecls(&fn->retty); - else - rcls = 5; - if (next() != TGlo) - err("function name expected"); - strcpy(fn->name, tokval.str); - parserefl(0); - if (nextnl() != TLBrace) - err("function body must start with {"); - ps = PLbl; - do - ps = parseline(ps); - while (ps != PEnd); - if (!curb) - err("empty file"); - if (curb->jmp.type == JXXX) - err("last block misses jump"); - fn->tmp = tmp; - fn->con = con; - fn->mem = vnew(0, sizeof fn->mem[0]); - fn->ntmp = ntmp; - fn->ncon = ncon; - fn->nmem = 0; - fn->nblk = nblk; - fn->rpo = 0; - return fn; -} - -static void -parsetyp() -{ - Typ *ty; - int t, n, sz, al, s, a, c, flt; - - if (ntyp >= NTyp) - err("too many type definitions"); - ty = &typ[ntyp++]; - ty->align = -1; - if (nextnl() != TTyp || nextnl() != TEq) - err("type name, then = expected"); - strcpy(ty->name, tokval.str); - t = nextnl(); - if (t == TAlign) { - if (nextnl() != TInt) - err("alignment expected"); - for (al=0; tokval.num /= 2; al++) - ; - ty->align = al; - t = nextnl(); - } - if (t != TLBrace) - err("type body must start with {"); - t = nextnl(); - if (t == TInt) { - ty->dark = 1; - ty->size = tokval.num; - if (ty->align == -1) - err("dark types need alignment"); - t = nextnl(); - } else { - ty->dark = 0; - n = -1; - sz = 0; - al = 0; - for (;;) { - flt = 0; - switch (t) { - default: err("invalid size specifier %c", tokval.chr); - case TD: flt = 1; - case TL: s = 8; a = 3; break; - case TS: flt = 1; - case TW: s = 4; a = 2; break; - case TH: s = 2; a = 1; break; - case TB: s = 1; a = 0; break; - } - if (a > al) - al = a; - if ((a = sz & (s-1))) { - a = s - a; - if (++n < NSeg) { - /* padding segment */ - ty->seg[n].ispad = 1; - ty->seg[n].len = a; - } - } - t = nextnl(); - if (t == TInt) { - c = tokval.num; - t = nextnl(); - } else - c = 1; - while (c-- > 0) { - if (++n < NSeg) { - ty->seg[n].isflt = flt; - ty->seg[n].ispad = 0; - ty->seg[n].len = s; - } - sz += a + s; - } - if (t != TComma) - break; - t = nextnl(); - } - if (++n >= NSeg) - ty->dark = 1; - else - ty->seg[n].len = 0; - if (ty->align == -1) - ty->align = al; - else - al = ty->align; - a = (1 << al) - 1; - ty->size = (sz + a) & ~a; - } - if (t != TRBrace) - err("expected closing }"); -} - -static void -parsedatref(Dat *d) -{ - int t; - - d->isref = 1; - d->u.ref.nam = tokval.str; - d->u.ref.off = 0; - t = peek(); - if (t == TPlus) { - next(); - if (next() != TInt) - err("invalid token after offset in ref"); - d->u.ref.off = tokval.num; - } -} - -static void -parsedatstr(Dat *d) -{ - d->isstr = 1; - d->u.str = tokval.str; -} - -static void -parsedat(void cb(Dat *), int export) -{ - char s[NString]; - int t; - Dat d; - - d.type = DStart; - d.isstr = 0; - d.isref = 0; - d.export = export; - cb(&d); - if (nextnl() != TGlo || nextnl() != TEq) - err("data name, then = expected"); - strcpy(s, tokval.str); - t = nextnl(); - if (t == TAlign) { - if (nextnl() != TInt) - err("alignment expected"); - d.type = DAlign; - d.u.num = tokval.num; - cb(&d); - t = nextnl(); - } - d.type = DName; - d.u.str = s; - cb(&d); - - if (t != TLBrace) - err("expected data contents in { .. }"); - for (;;) { - switch (nextnl()) { - default: err("invalid size specifier %c in data", tokval.chr); - case TRBrace: goto Done; - case TL: d.type = DL; break; - case TW: d.type = DW; break; - case TH: d.type = DH; break; - case TB: d.type = DB; break; - case TS: d.type = DW; break; - case TD: d.type = DL; break; - case TZ: d.type = DZ; break; - } - t = nextnl(); - do { - d.isref = 0; - d.isstr = 0; - memset(&d.u, 0, sizeof d.u); - if (t == TFlts) - d.u.flts = tokval.flts; - else if (t == TFltd) - d.u.fltd = tokval.fltd; - else if (t == TInt) - d.u.num = tokval.num; - else if (t == TGlo) - parsedatref(&d); - else if (t == TStr) - parsedatstr(&d); - else - err("constant literal expected"); - cb(&d); - t = nextnl(); - } while (t == TInt || t == TFlts || t == TFltd); - if (t == TRBrace) - break; - if (t != TComma) - err(", or } expected"); - } -Done: - d.type = DEnd; - cb(&d); -} - -void -parse(FILE *f, char *path, void data(Dat *), void func(Fn *)) -{ - int t, export; - - inf = f; - inpath = path; - lnum = 1; - thead = TXXX; - ntyp = 0; - for (;;) { - export = 0; - switch (nextnl()) { - default: - err("top-level definition expected"); - case TExport: - export = 1; - t = nextnl(); - if (t == TFunc) { - case TFunc: - func(parsefn(export)); - break; - } - else if (t == TData) { - case TData: - parsedat(data, export); - break; - } - else - err("export can only qualify data and function"); - case TType: - parsetyp(); - break; - case TEOF: - return; - } - } -} - -static void -printcon(Con *c, FILE *f) -{ - switch (c->type) { - case CUndef: - break; - case CAddr: - fprintf(f, "$%s", c->label); - if (c->bits.i) - fprintf(f, "%+"PRIi64, c->bits.i); - break; - case CBits: - if (c->flt == 1) - fprintf(f, "s_%f", c->bits.s); - else if (c->flt == 2) - fprintf(f, "d_%lf", c->bits.d); - else - fprintf(f, "%"PRIi64, c->bits.i); - break; - } -} - -void -printref(Ref r, Fn *fn, FILE *f) -{ - int i; - Mem *m; - - switch (rtype(r)) { - case RTmp: - if (r.val < Tmp0) - fprintf(f, "R%d", r.val); - else - fprintf(f, "%%%s", fn->tmp[r.val].name); - break; - case RCon: - printcon(&fn->con[r.val], f); - break; - case RSlot: - fprintf(f, "S%d", r.val); - break; - case RACall: - fprintf(f, "%03x", r.val & AMask); - break; - case RAType: - fprintf(f, ":%s", typ[r.val & AMask].name); - break; - case RAMem: - i = 0; - m = &fn->mem[r.val & AMask]; - fputc('[', f); - if (m->offset.type != CUndef) { - printcon(&m->offset, f); - i = 1; - } - if (!req(m->base, R)) { - if (i) - fprintf(f, " + "); - printref(m->base, fn, f); - i = 1; - } - if (!req(m->index, R)) { - if (i) - fprintf(f, " + "); - fprintf(f, "%d * ", m->scale); - printref(m->index, fn, f); - } - fputc(']', f); - break; - } -} - -void -printfn(Fn *fn, FILE *f) -{ - static char *jtoa[NJmp] = { - [JRet0] = "ret", - [JRetw] = "retw", - [JRetl] = "retl", - [JRetc] = "retc", - [JRets] = "rets", - [JRetd] = "retd", - [JJnz] = "jnz", - [JXJnp] = "xjnp", - [JXJp] = "xjp", - #define X(c) [JXJc+IC##c] = "xj" #c, - ICMPS(X) - #undef X - }; - static char prcls[NOp] = { - [OArg] = 1, - [OSwap] = 1, - [OXCmp] = 1, - [OXTest] = 1, - [OXDiv] = 1, - [OXIDiv] = 1, - }; - static char ktoc[] = "wlsd"; - Blk *b; - Phi *p; - Ins *i; - uint n; - - if (fn->export) - fprintf(f, "export "); - fprintf(f, "function $%s() {\n", fn->name); - for (b=fn->start; b; b=b->link) { - fprintf(f, "@%s\n", b->name); - for (p=b->phi; p; p=p->link) { - fprintf(f, "\t"); - printref(p->to, fn, f); - fprintf(f, " =%c phi ", ktoc[p->cls]); - assert(p->narg); - for (n=0;; n++) { - fprintf(f, "@%s ", p->blk[n]->name); - printref(p->arg[n], fn, f); - if (n == p->narg-1) { - fprintf(f, "\n"); - break; - } else - fprintf(f, ", "); - } - } - for (i=b->ins; i-b->ins < b->nins; i++) { - fprintf(f, "\t"); - if (!req(i->to, R)) { - printref(i->to, fn, f); - fprintf(f, " =%c ", ktoc[i->cls]); - } - assert(opdesc[i->op].name); - fprintf(f, "%s", opdesc[i->op].name); - if (req(i->to, R) && prcls[i->op]) - fputc(ktoc[i->cls], f); - if (!req(i->arg[0], R)) { - fprintf(f, " "); - printref(i->arg[0], fn, f); - } - if (!req(i->arg[1], R)) { - fprintf(f, ", "); - printref(i->arg[1], fn, f); - } - fprintf(f, "\n"); - } - switch (b->jmp.type) { - case JRet0: - case JRetw: - case JRetl: - case JRets: - case JRetd: - case JRetc: - fprintf(f, "\t%s", jtoa[b->jmp.type]); - if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) { - fprintf(f, " "); - printref(b->jmp.arg, fn, f); - } - if (b->jmp.type == JRetc) - fprintf(f, ", :%s", typ[fn->retty].name); - fprintf(f, "\n"); - break; - case JJmp: - if (b->s1 != b->link) - fprintf(f, "\tjmp @%s\n", b->s1->name); - break; - default: - fprintf(f, "\t%s ", jtoa[b->jmp.type]); - if (b->jmp.type == JJnz) { - printref(b->jmp.arg, fn, f); - fprintf(f, ", "); - } - fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name); - break; - } - } - fprintf(f, "}\n"); -} diff --git a/src/rega.c b/src/rega.c deleted file mode 100644 index 7f8edcf..0000000 --- a/src/rega.c +++ /dev/null @@ -1,598 +0,0 @@ -#include "all.h" - -#ifdef TEST_PMOV - #undef assert - #define assert(x) assert_test(#x, x) -#endif - -typedef struct RMap RMap; - -struct RMap { - int t[NIReg+NFReg]; - int r[NIReg+NFReg]; - BSet b[1]; - int n; -}; - -static bits regu; /* registers used */ -static Tmp *tmp; /* function temporaries */ -static Mem *mem; /* function mem references */ -static struct { - Ref src, dst; - int cls; -} *pm; /* parallel move constructed */ -static int cpm, npm; /* capacity and size of pm */ - -static int * -hint(int t) -{ - return &tmp[phicls(t, tmp)].hint.r; -} - -static void -sethint(int t, int r) -{ - bits m; - - m = tmp[phicls(t, tmp)].hint.m; - if (*hint(t) == -1) - if (!(BIT(r) & m)) - *hint(t) = r; -} - -static void -rcopy(RMap *ma, RMap *mb) -{ - memcpy(ma->t, mb->t, sizeof ma->t); - memcpy(ma->r, mb->r, sizeof ma->r); - bscopy(ma->b, mb->b); - ma->n = mb->n; -} - -static int -rfind(RMap *m, int t) -{ - int i; - - for (i=0; in; i++) - if (m->t[i] == t) - return m->r[i]; - return -1; -} - -static Ref -rref(RMap *m, int t) -{ - int r, s; - - r = rfind(m, t); - if (r == -1) { - s = tmp[t].slot; - assert(s != -1 && "should have spilled"); - return SLOT(s); - } else - return TMP(r); -} - -static void -radd(RMap *m, int t, int r) -{ - assert((t >= Tmp0 || t == r) && "invalid temporary"); - assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register"); - assert(!bshas(m->b, t) && "temporary has mapping"); - assert(!bshas(m->b, r) && "register already allocated"); - assert(m->n <= NIReg+NFReg && "too many mappings"); - bsset(m->b, t); - bsset(m->b, r); - m->t[m->n] = t; - m->r[m->n] = r; - m->n++; - regu |= BIT(r); -} - -static Ref -ralloc(RMap *m, int t) -{ - bits regs; - int r, r0, r1; - - if (t < Tmp0) { - assert(bshas(m->b, t)); - return TMP(t); - } - if (bshas(m->b, t)) { - r = rfind(m, t); - assert(r != -1); - return TMP(r); - } - r = *hint(t); - if (r == -1 || bshas(m->b, r)) { - regs = tmp[phicls(t, tmp)].hint.m; - regs |= m->b->t[0]; - switch (KBASE(tmp[t].cls)) { - case 0: - r0 = RAX; - r1 = RAX + NIReg; - break; - case 1: - r0 = XMM0; - r1 = XMM0 + NFReg; - break; - } - for (r=r0; rb, r)) - goto Found; - diag("rega: no more regs"); - } -Found: - radd(m, t, r); - sethint(t, r); - return TMP(r); -} - -static int -rfree(RMap *m, int t) -{ - int i, r; - - if (!bshas(m->b, t)) - return -1; - for (i=0; m->t[i] != t; i++) - assert(i+1 < m->n); - r = m->r[i]; - bsclr(m->b, t); - bsclr(m->b, r); - m->n--; - memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); - memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); - return r; -} - -static void -mdump(RMap *m) -{ - int i; - - for (i=0; in; i++) - fprintf(stderr, " (%s, R%d)", - tmp[m->t[i]].name, - m->r[i]); - fprintf(stderr, "\n"); -} - -static void -pmadd(Ref src, Ref dst, int k) -{ - if (npm == cpm) { - cpm = cpm * 2 + 16; - pm = realloc(pm, cpm * sizeof pm[0]); - if (!pm) - diag("pmadd: out of memory"); - } - pm[npm].src = src; - pm[npm].dst = dst; - pm[npm].cls = k; - npm++; -} - -enum PMStat { ToMove, Moving, Moved }; - -static Ref -pmrec(enum PMStat *status, int i, int *k) -{ - Ref swp, swp1; - int j, k1; - - /* note, this routine might emit - * too many large instructions: - * - * , x -- x - * x -- x -- x | - * ` x -- x - * - * if only the first move is wide - * the whole cycle will be wide, - * this is safe but not necessary - */ - - if (req(pm[i].src, pm[i].dst)) - return R; - status[i] = Moving; - assert(KBASE(*k) == KBASE(pm[i].cls)); - assert((Kw|1) == Kl && (Ks|1) == Kd); - *k |= KWIDE(pm[i].cls); /* see above */ - swp = R; - for (j=0; jb, r) && r1 != r) { - /* r is used and not by to */ - for (n=0; m->r[n] != r; n++) - assert(n+1 < m->n); - t = m->t[n]; - rfree(m, t); - bsset(m->b, r); - ralloc(m, t); - bsclr(m->b, r); - } - t = req(to, R) ? r : to.val; - radd(m, t, r); -} - -static int -regcpy(Ins *i) -{ - return i->op == OCopy && isreg(i->arg[0]); -} - -static Ins * -dopm(Blk *b, Ins *i, RMap *m) -{ - RMap m0; - int n, r, r1, t, s; - Ins *i0, *i1, *ip, *ir; - bits def; - - m0 = *m; - i1 = ++i; - do { - i--; - move(i->arg[0].val, i->to, m); - } while (i != b->ins && regcpy(i-1)); - assert(m0.n <= m->n); - if (i != b->ins && (i-1)->op == OCall) { - def = retregs((i-1)->arg[1], 0); - for (r=0; rn; n++) { - t = m->t[n]; - s = tmp[t].slot; - r1 = m->r[n]; - r = rfind(&m0, t); - if (r != -1) - pmadd(TMP(r1), TMP(r), tmp[t].cls); - else if (s != -1) - pmadd(TMP(r1), SLOT(s), tmp[t].cls); - } - for (ip=i; ipto, R)) - rfree(m, ip->to.val); - r = ip->arg[0].val; - if (rfind(m, r) == -1) - radd(m, r, r); - } - pmgen(); -#ifdef TEST_PMOV - return 0; -#endif - n = b->nins - (i1 - i) + (curi - insb); - i0 = alloc(n * sizeof(Ins)); - ip = icpy(ip = i0, b->ins, i - b->ins); - ip = icpy(ir = ip, insb, curi - insb); - ip = icpy(ip, i1, &b->ins[b->nins] - i1); - b->nins = n; - b->ins = i0; - return ir; -} - -static int -prio(Ref r1, Ref r2) -{ - /* trivial heuristic to begin with, - * later we can use the distance to - * the definition instruction - */ - (void) r2; - return *hint(r1.val) != -1; -} - -static void -insert(Ref *r, Ref **rs, int p) -{ - int i; - - rs[i = p] = r; - while (i-- > 0 && prio(*r, *rs[i])) { - rs[i+1] = rs[i]; - rs[i] = r; - } -} - -static void -doblk(Blk *b, RMap *cur) -{ - int x, r, nr; - bits rs; - Ins *i; - Mem *m; - Ref *ra[4]; - - if (rtype(b->jmp.arg) == RTmp) - b->jmp.arg = ralloc(cur, b->jmp.arg.val); - else if (rtype(b->jmp.arg) == RACall) { - /* add return registers */ - rs = retregs(b->jmp.arg, 0); - for (r=0; rs; rs/=2, r++) - if (rs & 1) - radd(cur, r, r); - } - for (i=&b->ins[b->nins]; i!=b->ins;) { - switch ((--i)->op) { - case OCall: - rs = argregs(i->arg[1], 0); - for (r=0; rarg[0])) { - i = dopm(b, i, cur); - continue; - } - if (isreg(i->to)) - if (rtype(i->arg[0]) == RTmp) - sethint(i->arg[0].val, i->to.val); - /* fall through */ - default: - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - r = rfree(cur, i->to.val); - if (r == -1 && !isreg(i->to)) { - *i = (Ins){.op = ONop}; - continue; - } - if (i->to.val >= Tmp0) - i->to = TMP(r); - } - break; - } - for (x=0, nr=0; x<2; x++) - switch (rtype(i->arg[x])) { - case RAMem: - m = &mem[i->arg[x].val & AMask]; - if (rtype(m->base) == RTmp) - insert(&m->base, ra, nr++); - if (rtype(m->index) == RTmp) - insert(&m->index, ra, nr++); - break; - case RTmp: - insert(&i->arg[x], ra, nr++); - break; - } - for (r=0; rval); - } -} - -/* register allocation - * depends on rpo, phi, cost, (and obviously spill) - */ -void -rega(Fn *fn) -{ - int j, n, t, r, r1, x, rl[Tmp0]; - Blk *b, *b1, *s, ***ps, *blist; - RMap *end, *beg, cur, old; - Ins *i; - Phi *p; - uint u; - Ref src, dst; - - /* 1. setup */ - regu = 0; - tmp = fn->tmp; - mem = fn->mem; - end = alloc(fn->nblk * sizeof end[0]); - beg = alloc(fn->nblk * sizeof beg[0]); - for (n=0; nnblk; n++) { - bsinit(end[n].b, fn->ntmp); - bsinit(beg[n].b, fn->ntmp); - } - bsinit(cur.b, fn->ntmp); - bsinit(old.b, fn->ntmp); - - for (t=Tmp0; tntmp; t++) - *hint(t) = -1; - for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) - if (i->op != OCopy || !isreg(i->arg[0])) - break; - else { - assert(rtype(i->to) == RTmp); - sethint(i->to.val, i->arg[0].val); - } - - /* 2. assign registers following post-order */ - for (n=fn->nblk-1; n>=0; n--) { - b = fn->rpo[n]; - cur.n = 0; - bszero(cur.b); - for (x=0; x<2; x++) - for (t=Tmp0; tntmp; t++) { - assert(bshas(b->out, t) || - !bshas(cur.b, t)); - if (bshas(b->out, t)) - if (!bshas(cur.b, t)) - if (x || (r=*hint(t)) != -1) - if (x || !bshas(cur.b, r)) - ralloc(&cur, t); - } - rcopy(&end[n], &cur); - doblk(b, &cur); - bscopy(b->in, cur.b); - for (p=b->phi; p; p=p->link) - if (rtype(p->to) == RTmp) { - bsclr(b->in, p->to.val); - /* heuristic 0: - * if the phi destination has an - * argument from a frequent block - * that was already allocated to - * 'r', use 'r' as the new hint - */ - memset(rl, 0, sizeof rl); - for (u=0; unarg; u++) { - t = p->arg[u].val; - b1 = p->blk[u]; - if (rtype(p->arg[u]) == RTmp) - if ((r=rfind(&end[b1->id], t)) != -1) - rl[r] += b1->loop; - } - for (x=0, j=0; j rl[x]) - x = j; - if (rl[x] >= b->loop) - *hint(p->to.val) = x; - } - if (b->npred > 1) { - /* heuristic 1: - * attempt to satisfy hints - * when it's simple and we have - * multiple predecessors - */ - rcopy(&old, &cur); - curi = &insb[NIns]; - for (j=0; jnins += j; - i = alloc(b->nins * sizeof(Ins)); - icpy(icpy(i, curi, j), b->ins, b->nins-j); - b->ins = i; - } - } - rcopy(&beg[n], &cur); - } - if (debug['R']) { - fprintf(stderr, "\n> Register mappings:\n"); - for (n=0; nnblk; n++) { - b = fn->rpo[n]; - fprintf(stderr, "\t%-10s beg", b->name); - mdump(&beg[n]); - fprintf(stderr, "\t end"); - mdump(&end[n]); - } - fprintf(stderr, "\n"); - } - - /* 3. compose glue code */ - blist = 0; - for (b=fn->start;; b=b->link) { - ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; - for (; (s=**ps); ps++) { - npm = 0; - for (p=s->phi; p; p=p->link) { - dst = p->to; - assert(rtype(dst)==RSlot || rtype(dst)==RTmp); - if (rtype(dst) == RTmp) { - r = rfind(&beg[s->id], dst.val); - if (r == -1) - continue; - dst = TMP(r); - } - for (u=0; p->blk[u]!=b; u++) - assert(u+1 < p->narg); - src = p->arg[u]; - if (rtype(src) == RTmp) - src = rref(&end[b->id], src.val); - pmadd(src, dst, p->cls); - } - for (t=Tmp0; tntmp; t++) - if (bshas(s->in, t)) { - src = rref(&end[b->id], t); - dst = rref(&beg[s->id], t); - pmadd(src, dst, tmp[t].cls); - } - pmgen(); - if (curi == insb) - continue; - b1 = blknew(); - b1->loop = (b->loop+s->loop) / 2; - b1->link = blist; - blist = b1; - fn->nblk++; - sprintf(b1->name, "%s_%s", b->name, s->name); - b1->nins = curi - insb; - idup(&b1->ins, insb, b1->nins); - b1->jmp.type = JJmp; - b1->s1 = s; - **ps = b1; - } - if (!b->link) { - b->link = blist; - break; - } - } - for (b=fn->start; b; b=b->link) - b->phi = 0; - fn->reg = regu; - - if (debug['R']) { - fprintf(stderr, "\n> After register allocation:\n"); - printfn(fn, stderr); - } -} diff --git a/src/spill.c b/src/spill.c deleted file mode 100644 index 72f8106..0000000 --- a/src/spill.c +++ /dev/null @@ -1,507 +0,0 @@ -#include "all.h" - -static void -loopmark(Blk *hd, Blk *b, Phi *p) -{ - int k, head; - uint n, a; - - head = hd->id; - if (b->id < head) - return; - for (; p; p=p->link) - for (a=0; anarg; a++) - if (p->blk[a] == b) - if (rtype(p->arg[a]) == RTmp) - bsset(hd->gen, p->arg[a].val); - if (b->visit == head) - return; - b->visit = head; - b->loop *= 10; - /* aggregate looping information at - * loop headers */ - bsunion(hd->gen, b->gen); - for (k=0; k<2; k++) - if (b->nlive[k] > hd->nlive[k]) - hd->nlive[k] = b->nlive[k]; - for (n=0; nnpred; n++) - loopmark(hd, b->pred[n], b->phi); -} - -static void -tmpuse(Ref r, int use, int loop, Fn *fn) -{ - Mem *m; - Tmp *t; - - if (rtype(r) == RAMem) { - m = &fn->mem[r.val & AMask]; - tmpuse(m->base, 1, loop, fn); - tmpuse(m->index, 1, loop, fn); - } - else if (rtype(r) == RTmp && r.val >= Tmp0) { - t = &fn->tmp[r.val]; - t->nuse += use; - t->ndef += !use; - t->cost += loop; - } -} - -/* evaluate spill costs of temporaries, - * this also fills usage information - * requires rpo, preds - */ -void -fillcost(Fn *fn) -{ - int n, hd; - uint a; - Blk *b; - Ins *i; - Tmp *t; - Phi *p; - - for (b=fn->start; b; b=b->link) { - b->loop = 1; - b->visit = -1; - } - if (debug['S']) - fprintf(stderr, "\n> Loop information:\n"); - for (n=0; nnblk; n++) { - b = fn->rpo[n]; - hd = 0; - for (a=0; anpred; a++) - if (b->pred[a]->id >= n) { - loopmark(b, b->pred[a], b->phi); - hd = 1; - } - if (hd && debug['S']) { - fprintf(stderr, "\t%-10s", b->name); - fprintf(stderr, " (% 3d ", b->nlive[0]); - fprintf(stderr, "% 3d) ", b->nlive[1]); - dumpts(b->gen, fn->tmp, stderr); - } - } - for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) { - t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0; - t->nuse = 0; - t->ndef = 0; - } - for (b=fn->start; b; b=b->link) { - for (p=b->phi; p; p=p->link) { - /* todo, the cost computation - * for p->to is not great... */ - tmpuse(p->to, 0, 0, fn); - for (a=0; anarg; a++) { - n = p->blk[a]->loop; - assert(b->npred==p->narg && - "wrong cfg"); - n /= b->npred; - tmpuse(p->arg[a], 1, n, fn); - } - } - n = b->loop; - for (i=b->ins; i-b->ins < b->nins; i++) { - tmpuse(i->to, 0, n, fn); - tmpuse(i->arg[0], 1, n, fn); - tmpuse(i->arg[1], 1, n, fn); - } - tmpuse(b->jmp.arg, 1, n, fn); - } - if (debug['S']) { - fprintf(stderr, "\n> Spill costs:\n"); - for (n=Tmp0; nntmp; n++) - fprintf(stderr, "\t%-10s %d\n", - fn->tmp[n].name, - fn->tmp[n].cost); - fprintf(stderr, "\n"); - } -} - -static BSet *fst; /* temps to prioritize in registers (for tcmp1) */ -static Tmp *tmp; /* current temporaries (for tcmpX) */ -static int ntmp; /* current # of temps (for limit) */ -static int locs; /* stack size used by locals */ -static int slot4; /* next slot of 4 bytes */ -static int slot8; /* ditto, 8 bytes */ -static BSet mask[2][1]; /* class masks */ - -static int -tcmp0(const void *pa, const void *pb) -{ - return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost; -} - -static int -tcmp1(const void *pa, const void *pb) -{ - int c; - - c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa); - return c ? c : tcmp0(pa, pb); -} - -static Ref -slot(int t) -{ - int s; - - if (t < Tmp0) - diag("spill: cannot spill register"); - s = tmp[t].slot; - if (s == -1) { - assert(NAlign == 3); - /* nice logic to pack stack slots - * on demand, there can be only - * one hole and slot4 points to it - * - * invariant: slot4 <= slot8 - */ - if (KWIDE(tmp[t].cls)) { - s = slot8; - if (slot4 == slot8) - slot4 += 2; - slot8 += 2; - } else { - s = slot4; - if (slot4 == slot8) { - slot8 += 2; - slot4 += 1; - } else - slot4 = slot8; - } - s += locs; - tmp[t].slot = s; - } - return SLOT(s); -} - -static void -limit(BSet *b, int k, BSet *f) -{ - static int *tarr, maxt; - int i, nt; - uint t; - - nt = bscount(b); - if (nt <= k) - return; - if (nt > maxt) { - free(tarr); - tarr = emalloc(nt * sizeof tarr[0]); - maxt = nt; - } - for (i=0, t=0; bsiter(b, &t); t++) { - bsclr(b, t); - tarr[i++] = t; - } - if (!f) - qsort(tarr, nt, sizeof tarr[0], tcmp0); - else { - fst = f; - qsort(tarr, nt, sizeof tarr[0], tcmp1); - } - for (i=0; iop == OCopy && isreg(i->arg[0]); -} - -static Ins * -dopm(Blk *b, Ins *i, BSet *v) -{ - int n, t; - BSet u[1]; - Ins *i1; - bits r; - - bsinit(u, ntmp); /* todo, free those */ - /* consecutive copies from - * registers need to be handled - * as one large instruction - * - * fixme: there is an assumption - * that calls are always followed - * by copy instructions here, this - * might not be true if previous - * passes change - */ - i1 = ++i; - do { - i--; - t = i->to.val; - if (!req(i->to, R)) - if (bshas(v, t)) { - bsclr(v, t); - store(i->to, tmp[t].slot); - } - bsset(v, i->arg[0].val); - } while (i != b->ins && regcpy(i-1)); - bscopy(u, v); - if (i != b->ins && (i-1)->op == OCall) { - v->t[0] &= ~retregs((i-1)->arg[1], 0); - limit2(v, NISave, NFSave, 0); - for (r=0, n=0; nt[0] |= argregs((i-1)->arg[1], 0); - } else { - limit2(v, 0, 0, 0); - r = v->t[0]; - } - sethint(v, r); - reloads(u, v); - do - emiti(*--i1); - while (i1 != i); - return i; -} - -/* spill code insertion - * requires spill costs, rpo, liveness - * - * Note: this will replace liveness - * information (in, out) with temporaries - * that must be in registers at block - * borders - * - * Be careful with: - * - OCopy instructions to ensure register - * constraints - */ -void -spill(Fn *fn) -{ - Blk *b, *s1, *s2, *hd, **bp; - int j, n, l, t, k, lvarg[2]; - BSet u[1], v[1], w[1]; - Ins *i; - Phi *p; - Mem *m; - bits r; - - tmp = fn->tmp; - ntmp = fn->ntmp; - bsinit(u, ntmp); - bsinit(v, ntmp); - bsinit(w, ntmp); - bsinit(mask[0], ntmp); - bsinit(mask[1], ntmp); - locs = fn->slot; - slot4 = 0; - slot8 = 0; - for (t=0; t= XMM0 && t < XMM0 + NFReg) - k = 1; - else if (t >= Tmp0) - k = KBASE(tmp[t].cls); - bsset(mask[k], t); - } - - for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) { - b = *--bp; - /* invariant: all bocks with bigger rpo got - * their in,out updated. */ - - /* 1. find temporaries in registers at - * the end of the block (put them in v) */ - curi = 0; - s1 = b->s1; - s2 = b->s2; - hd = 0; - if (s1 && s1->id <= n) - hd = s1; - if (s2 && s2->id <= n) - if (!hd || s2->id >= hd->id) - hd = s2; - r = 0; - bszero(v); - if (hd) { - /* back-edge */ - for (k=0; k<2; k++) { - n = k == 0 ? NIReg : NFReg; - bscopy(u, b->out); - bsinter(u, mask[k]); - bscopy(w, u); - bsinter(u, hd->gen); - bsdiff(w, hd->gen); - if ((int)bscount(u) < n) { /* fixme */ - j = bscount(w); /* live through */ - l = hd->nlive[k]; - limit(w, n - (l - j), 0); - bsunion(u, w); - } else - limit(u, n, 0); - bsunion(v, u); - } - } else if (s1) { - liveon(v, b, s1); - if (s2) { - liveon(u, b, s2); - bscopy(w, u); - bsinter(w, v); - bsunion(v, u); - } - limit2(v, 0, 0, w); - } else if (rtype(b->jmp.arg) == RACall) { - /* return */ - r = retregs(b->jmp.arg, 0); - v->t[0] |= r; - } - bscopy(b->out, v); - - /* 2. process the block instructions */ - curi = &insb[NIns]; - for (i=&b->ins[b->nins]; i!=b->ins;) { - i--; - if (regcpy(i)) { - i = dopm(b, i, v); - continue; - } - bszero(w); - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - t = i->to.val; - if (bshas(v, t)) - bsclr(v, t); - else { - /* make sure we have a reg - * for the result */ - bsset(v, t); - bsset(w, t); - } - } - j = opdesc[i->op].nmem; - for (n=0; n<2; n++) - if (rtype(i->arg[n]) == RAMem) - j--; - for (n=0; n<2; n++) - switch (rtype(i->arg[n])) { - case RAMem: - t = i->arg[n].val; - m = &fn->mem[t & AMask]; - if (rtype(m->base) == RTmp) { - bsset(v, m->base.val); - bsset(w, m->base.val); - } - if (rtype(m->index) == RTmp) { - bsset(v, m->index.val); - bsset(w, m->index.val); - } - break; - case RTmp: - t = i->arg[n].val; - lvarg[n] = bshas(v, t); - bsset(v, t); - if (j-- <= 0) - bsset(w, t); - break; - } - bscopy(u, v); - limit2(v, 0, 0, w); - for (n=0; n<2; n++) - if (rtype(i->arg[n]) == RTmp) { - t = i->arg[n].val; - if (!bshas(v, t)) { - /* do not reload if the - * the temporary was dead - */ - if (!lvarg[n]) - bsclr(u, t); - i->arg[n] = slot(t); - } - } - reloads(u, v); - if (!req(i->to, R)) { - t = i->to.val; - store(i->to, tmp[t].slot); - bsclr(v, t); - } - emiti(*i); - r = v->t[0] & (BIT(Tmp0)-1); - if (r) - sethint(v, r); - } - assert(!r || b==fn->start); - - for (p=b->phi; p; p=p->link) { - assert(rtype(p->to) == RTmp); - t = p->to.val; - if (bshas(v, t)) { - bsclr(v, t); - store(p->to, tmp[t].slot); - } else if (bshas(b->in, t)) - /* only if the phi is live */ - p->to = slot(p->to.val); - } - bscopy(b->in, v); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } - - /* align the locals to a 16 byte boundary */ - assert(NAlign == 3); - slot8 += slot8 & 3; - fn->slot += slot8; - - if (debug['S']) { - fprintf(stderr, "\n> Block information:\n"); - for (b=fn->start; b; b=b->link) { - printf("\t%-10s (% 5d) ", b->name, b->loop); - dumpts(b->out, fn->tmp, stdout); - } - fprintf(stderr, "\n> After spilling:\n"); - printfn(fn, stderr); - } -} diff --git a/src/ssa.c b/src/ssa.c deleted file mode 100644 index 0c163aa..0000000 --- a/src/ssa.c +++ /dev/null @@ -1,516 +0,0 @@ -#include "all.h" -#include - -static void -adduse(Tmp *tmp, int ty, Blk *b, ...) -{ - Use *u; - int n; - va_list ap; - - va_start(ap, b); - n = tmp->nuse; - vgrow(&tmp->use, ++tmp->nuse); - u = &tmp->use[n]; - u->type = ty; - u->bid = b->id; - switch (ty) { - default: - diag("ssa: adduse defaulted"); - case UPhi: - u->u.phi = va_arg(ap, Phi *); - break; - case UIns: - u->u.ins = va_arg(ap, Ins *); - break; - case UJmp: - break; - } - va_end(ap); -} - -/* fill usage, phi, and class information - */ -void -filluse(Fn *fn) -{ - Blk *b; - Phi *p; - Ins *i; - int m, t; - uint a; - Tmp *tmp; - - /* todo, is this the correct file? */ - tmp = fn->tmp; - for (t=0; tntmp; t++) { - tmp[t].ndef = 0; - tmp[t].nuse = 0; - tmp[t].phi = 0; - tmp[t].cls = 0; - if (tmp[t].use == 0) - tmp[t].use = vnew(0, sizeof(Use)); - } - for (b=fn->start; b; b=b->link) { - for (p=b->phi; p; p=p->link) { - assert(rtype(p->to) == RTmp); - t = p->to.val; - tmp[t].ndef++; - tmp[t].cls = p->cls; - tmp[t].phi = p->to.val; - for (a=0; anarg; a++) - if (rtype(p->arg[a]) == RTmp) { - t = p->arg[a].val; - adduse(&tmp[t], UPhi, b, p); - if (!tmp[t].phi) - tmp[t].phi = p->to.val; - } - } - for (i=b->ins; i-b->ins < b->nins; i++) { - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - t = i->to.val; - tmp[t].ndef++; - tmp[t].cls = i->cls; - } - for (m=0; m<2; m++) - if (rtype(i->arg[m]) == RTmp) { - t = i->arg[m].val; - adduse(&tmp[t], UIns, b, i); - } - } - if (rtype(b->jmp.arg) == RTmp) - adduse(&tmp[b->jmp.arg.val], UJmp, b); - } -} - -static void -addpred(Blk *bp, Blk *bc) -{ - uint i; - - if (!bc->pred) { - bc->pred = alloc(bc->npred * sizeof bc->pred[0]); - for (i=0; inpred; i++) - bc->pred[i] = 0; - } - for (i=0; bc->pred[i]; i++) - ; - bc->pred[i] = bp; -} - -/* fill predecessors information in blocks - */ -void -fillpreds(Fn *f) -{ - Blk *b; - - for (b=f->start; b; b=b->link) { - b->npred = 0; - b->pred = 0; - } - for (b=f->start; b; b=b->link) { - if (b->s1) - b->s1->npred++; - if (b->s2) - b->s2->npred++; - } - for (b=f->start; b; b=b->link) { - if (b->s1) - addpred(b, b->s1); - if (b->s2) - addpred(b, b->s2); - } -} - -static int -rporec(Blk *b, int x) -{ - Blk *s1, *s2; - - if (!b || b->id >= 0) - return x; - b->id = 1; - s1 = b->s1; - s2 = b->s2; - if (s1 && s2 && s1->loop > s2->loop) { - s1 = b->s2; - s2 = b->s1; - } - x = rporec(s1, x); - x = rporec(s2, x); - b->id = x; - assert(x >= 0); - return x - 1; -} - -/* fill the rpo information in blocks - */ -void -fillrpo(Fn *f) -{ - int n; - Blk *b, **p; - - for (b=f->start; b; b=b->link) - b->id = -1; - n = 1 + rporec(f->start, f->nblk-1); - f->nblk -= n; - f->rpo = alloc(f->nblk * sizeof f->rpo[0]); - for (p=&f->start; *p;) { - b = *p; - if (b->id == -1) { - *p = b->link; - /* todo, free block */ - } else { - b->id -= n; - f->rpo[b->id] = b; - p=&(*p)->link; - } - } -} - -/* for dominators computation, read - * "A Simple, Fast Dominance Algorithm" - * by K. Cooper, T. Harvey, and K. Kennedy. - */ - -static Blk * -inter(Blk *b1, Blk *b2) -{ - Blk *bt; - - if (b1 == 0) - return b2; - while (b1 != b2) { - if (b1->id < b2->id) { - bt = b1; - b1 = b2; - b2 = bt; - } - while (b1->id > b2->id) { - b1 = b1->idom; - assert(b1); - } - } - return b1; -} - -static void -filldom(Fn *fn) -{ - Blk *b, *d; - int ch, n; - uint p; - - for (b=fn->start; b; b=b->link) { - b->idom = 0; - b->dom = 0; - b->dlink = 0; - } - do { - ch = 0; - for (n=1; nnblk; n++) { - b = fn->rpo[n]; - d = 0; - for (p=0; pnpred; p++) - if (b->pred[p]->idom - || b->pred[p] == fn->start) - d = inter(d, b->pred[p]); - if (d != b->idom) { - ch++; - b->idom = d; - } - } - } while (ch); - for (b=fn->start; b; b=b->link) - if ((d=b->idom)) { - assert(d != b); - b->dlink = d->dom; - d->dom = b; - } -} - -static int -sdom(Blk *b1, Blk *b2) -{ - assert(b1 && b2); - if (b1 == b2) - return 0; - while (b2->id > b1->id) - b2 = b2->idom; - return b1 == b2; -} - -static int -dom(Blk *b1, Blk *b2) -{ - return b1 == b2 || sdom(b1, b2); -} - -static void -addfron(Blk *a, Blk *b) -{ - int n; - - for (n=0; nnfron; n++) - if (a->fron[n] == b) - return; - if (!a->nfron) - a->fron = vnew(++a->nfron, sizeof a->fron[0]); - else - vgrow(&a->fron, ++a->nfron); - a->fron[a->nfron-1] = b; -} - -static void -fillfron(Fn *fn) -{ - Blk *a, *b; - - for (b=fn->start; b; b=b->link) { - if (b->s1) - for (a=b; !sdom(a, b->s1); a=a->idom) - addfron(a, b->s1); - if (b->s2) - for (a=b; !sdom(a, b->s2); a=a->idom) - addfron(a, b->s2); - } -} - -static Ref -refindex(int t, Fn *fn) -{ - return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn); -} - -static void -phiins(Fn *fn) -{ - BSet u[1], defs[1]; - Blk *a, *b, **blist, **be, **bp; - Ins *i; - Phi *p; - Ref r; - int t, n, k, nt; - - bsinit(u, fn->nblk); - bsinit(defs, fn->nblk); - blist = emalloc(fn->nblk * sizeof blist[0]); - be = &blist[fn->nblk]; - nt = fn->ntmp; - for (t=Tmp0; ttmp[t].visit = 0; - if (fn->tmp[t].phi != 0) - continue; - bszero(u); - k = -1; - bp = be; - for (b=fn->start; b; b=b->link) { - b->visit = 0; - r = R; - for (i=b->ins; i-b->ins < b->nins; i++) { - if (!req(r, R)) { - if (req(i->arg[0], TMP(t))) - i->arg[0] = r; - if (req(i->arg[1], TMP(t))) - i->arg[1] = r; - } - if (req(i->to, TMP(t))) { - if (!bshas(b->out, t)) { - if (fn->tmp[t].ndef == 1) - r = TMP(t); - else - r = refindex(t, fn); - i->to = r; - } else { - if (!bshas(u, b->id)) { - bsset(u, b->id); - *--bp = b; - } - if (k == -1) - k = i->cls; - assert(k == i->cls); - } - } - } - if (!req(r, R) && req(b->jmp.arg, TMP(t))) - b->jmp.arg = r; - } - bscopy(defs, u); - while (bp != be) { - fn->tmp[t].visit = t; - b = *bp++; - bsclr(u, b->id); - for (n=0; nnfron; n++) { - a = b->fron[n]; - if (a->visit++ == 0) - if (bshas(a->in, t)) { - p = alloc(sizeof *p); - p->cls = k; - p->to = TMP(t); - p->link = a->phi; - a->phi = p; - if (!bshas(defs, a->id)) - if (!bshas(u, a->id)) { - bsset(u, a->id); - *--bp = a; - } - } - } - } - } - free(blist); -} - -typedef struct Name Name; -struct Name { - Ref r; - Blk *b; - Name *up; -}; - -static Name *namel; - -static Name * -nnew(Ref r, Blk *b, Name *up) -{ - Name *n; - - if (namel) { - n = namel; - namel = n->up; - } else - /* could use alloc, here - * but namel should be reset - */ - n = emalloc(sizeof *n); - n->r = r; - n->b = b; - n->up = up; - return n; -} - -static void -nfree(Name *n) -{ - n->up = namel; - namel = n; -} - -static void -rendef(Ref *r, Blk *b, Name **stk, Fn *fn) -{ - Ref r1; - int t; - - t = r->val; - if (req(*r, R) || !fn->tmp[t].visit) - return; - r1 = refindex(t, fn); - fn->tmp[r1.val].visit = t; - stk[t] = nnew(r1, b, stk[t]); - *r = r1; -} - -static Ref -getstk(int t, Blk *b, Name **stk) -{ - Name *n, *n1; - - n = stk[t]; - while (n && !dom(n->b, b)) { - n1 = n; - n = n->up; - nfree(n1); - } - stk[t] = n; - if (!n) { - /* uh, oh, warn */ - return CON_Z; - } else - return n->r; -} - -static void -renblk(Blk *b, Name **stk, Fn *fn) -{ - Phi *p; - Ins *i; - Blk *s, **ps, *succ[3]; - int t, m; - - for (p=b->phi; p; p=p->link) - rendef(&p->to, b, stk, fn); - for (i=b->ins; i-b->ins < b->nins; i++) { - for (m=0; m<2; m++) { - t = i->arg[m].val; - if (rtype(i->arg[m]) == RTmp) - if (fn->tmp[t].visit) - i->arg[m] = getstk(t, b, stk); - } - rendef(&i->to, b, stk, fn); - } - t = b->jmp.arg.val; - if (rtype(b->jmp.arg) == RTmp) - if (fn->tmp[t].visit) - b->jmp.arg = getstk(t, b, stk); - succ[0] = b->s1; - succ[1] = b->s2; - succ[2] = 0; - for (ps=succ; (s=*ps); ps++) - for (p=s->phi; p; p=p->link) { - t = p->to.val; - if ((t=fn->tmp[t].visit)) { - m = p->narg++; - if (m == NPred) - diag("ssa: too many phi arguments"); - p->arg[m] = getstk(t, b, stk); - p->blk[m] = b; - } - } - for (s=b->dom; s; s=s->dlink) - renblk(s, stk, fn); -} - -/* require ndef */ -void -ssa(Fn *fn) -{ - Name **stk, *n; - int d, nt; - Blk *b, *b1; - - nt = fn->ntmp; - stk = emalloc(nt * sizeof stk[0]); - d = debug['L']; - debug['L'] = 0; - filldom(fn); - if (debug['N']) { - fprintf(stderr, "\n> Dominators:\n"); - for (b1=fn->start; b1; b1=b1->link) { - if (!b1->dom) - continue; - fprintf(stderr, "%10s:", b1->name); - for (b=b1->dom; b; b=b->dlink) - fprintf(stderr, " %s", b->name); - fprintf(stderr, "\n"); - } - } - fillfron(fn); - filllive(fn); - phiins(fn); - renblk(fn->start, stk, fn); - while (nt--) - while ((n=stk[nt])) { - stk[nt] = n->up; - nfree(n); - } - debug['L'] = d; - free(stk); - if (debug['N']) { - fprintf(stderr, "\n> After SSA construction:\n"); - printfn(fn, stderr); - } -} diff --git a/src/util.c b/src/util.c deleted file mode 100644 index 65b3ff8..0000000 --- a/src/util.c +++ /dev/null @@ -1,329 +0,0 @@ -#include "all.h" - -typedef struct Bitset Bitset; -typedef struct Vec Vec; - -struct Vec { - ulong mag; - size_t esz; - ulong cap; - union { - long long ll; - long double ld; - void *ptr; - } align[]; -}; - -enum { - VMin = 2, - VMag = 0xcabba9e, - NPtr = 256, -}; - -Typ typ[NTyp]; -Ins insb[NIns], *curi; - -static void *ptr[NPtr]; -static void **pool = ptr; -static int nptr = 1; - -void -diag(char *s) -{ - fputs(s, stderr); - fputc('\n', stderr); - abort(); -} - -void * -emalloc(size_t n) -{ - void *p; - - p = calloc(1, n); - if (!p) - diag("emalloc: out of memory"); - return p; -} - -void * -alloc(size_t n) -{ - void **pp; - - if (n == 0) - return 0; - if (nptr >= NPtr) { - pp = emalloc(NPtr * sizeof(void *)); - pp[0] = pool; - pool = pp; - nptr = 1; - } - return pool[nptr++] = emalloc(n); -} - -void -freeall() -{ - void **pp; - - for (;;) { - for (pp = &pool[1]; pp < &pool[nptr]; pp++) - free(*pp); - pp = pool[0]; - if (!pp) - break; - free(pool); - pool = pp; - nptr = NPtr; - } - nptr = 1; -} - -Blk * -blknew() -{ - static Blk z; - Blk *b; - - b = alloc(sizeof *b); - *b = z; - return b; -} - -void -emit(int op, int k, Ref to, Ref arg0, Ref arg1) -{ - if (curi == insb) - diag("emit: too many instructions"); - *--curi = (Ins){ - .op = op, .cls = k, - .to = to, .arg = {arg0, arg1} - }; -} - -void -emiti(Ins i) -{ - emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]); -} - -void -idup(Ins **pd, Ins *s, ulong n) -{ - *pd = alloc(n * sizeof(Ins)); - memcpy(*pd, s, n * sizeof(Ins)); -} - -Ins * -icpy(Ins *d, Ins *s, ulong n) -{ - memcpy(d, s, n * sizeof(Ins)); - return d + n; -} - -void * -vnew(ulong len, size_t esz) -{ - ulong cap; - Vec *v; - - for (cap=VMin; capmag = VMag; - v->cap = cap; - v->esz = esz; - return v + 1; -} - -void -vgrow(void *vp, ulong len) -{ - Vec *v; - void *v1; - - v = *(Vec **)vp - 1; - assert(v+1 && v->mag == VMag); - if (v->cap >= len) - return; - v1 = vnew(len, v->esz); - memcpy(v1, v+1, v->cap * v->esz); - *(Vec **)vp = v1; -} - -int -phicls(int t, Tmp *tmp /*, int c*/) -{ - if (tmp[t].phi) - return tmp[t].phi; - return t; -#if 0 - int t1; - - t1 = tmp[t].phi; - if (!t1) - t1 = t; - if (t != t1) { - t1 = phitmp(t1, tmp, c); - if (c) - tmp[t].phi = t1; - } - return t1; -#endif -} - -Ref -newtmp(char *prfx, int k, Fn *fn) -{ - static int n; - int t; - - t = fn->ntmp++; - vgrow(&fn->tmp, fn->ntmp); - sprintf(fn->tmp[t].name, "%s%d", prfx, ++n); - fn->tmp[t].cls = k; - fn->tmp[t].slot = -1; - fn->tmp[t].nuse = +1; - fn->tmp[t].ndef = +1; - return TMP(t); -} - -Ref -getcon(int64_t val, Fn *fn) -{ - int c; - - for (c=0; cncon; c++) - if (fn->con[c].type == CBits && fn->con[c].bits.i == val) - return CON(c); - fn->ncon++; - vgrow(&fn->con, fn->ncon); - fn->con[c] = (Con){.type = CBits, .bits.i = val}; - return CON(c); -} - -void -addcon(Con *c0, Con *c1) -{ - if (c0->type == CUndef) - *c0 = *c1; - else { - if (c1->type == CAddr) { - if (c0->type == CAddr) - diag("addcon: adding two addresses"); - c0->type = CAddr; - strcpy(c0->label, c1->label); - } - c0->bits.i += c1->bits.i; - } -} - -void -bsinit(BSet *bs, uint n) -{ - n = (n + NBit-1) / NBit; - bs->nt = n; - bs->t = alloc(n * sizeof bs->t[0]); -} - -uint -bscount(BSet *bs) -{ - uint i, j, n; - - n = 0; - for (i=0; int; i++) - for (j=0; jt[i] & BIT(j)) - n++; - return n; -} - -static inline uint -bsmax(BSet *bs) -{ - return bs->nt * NBit; -} - -void -bsset(BSet *bs, uint elt) -{ - assert(elt < bsmax(bs)); - bs->t[elt/NBit] |= BIT(elt%NBit); -} - -void -bsclr(BSet *bs, uint elt) -{ - assert(elt < bsmax(bs)); - bs->t[elt/NBit] &= ~BIT(elt%NBit); -} - -#define BSOP(f, op) \ - void \ - f(BSet *a, BSet *b) \ - { \ - uint i; \ - \ - assert(a->nt == b->nt); \ - for (i=0; int; i++) \ - a->t[i] op b->t[i]; \ - } - -BSOP(bscopy, =) -BSOP(bsunion, |=) -BSOP(bsinter, &=) -BSOP(bsdiff, &= ~) - -int -bsequal(BSet *a, BSet *b) -{ - uint i; - - assert(a->nt == b->nt); - for (i=0; int; i++) - if (a->t[i] != b->t[i]) - return 0; - return 1; -} - -void -bszero(BSet *bs) -{ - memset(bs->t, 0, bs->nt * sizeof bs->t[0]); -} - -/* iterates on a bitset, use as follows - * - * for (i=0; bsiter(set, &i); i++) - * use(i); - * - */ -int -bsiter(BSet *bs, uint *elt) -{ - uint i; - - for (i=*elt;; i++) { - while (i < bsmax(bs) && !bs->t[i/NBit]) - i = (i + NBit) & -NBit; - if (i >= bsmax(bs)) - return 0; - if (bshas(bs, i)) { - *elt = i; - return 1; - } - } -} - -void -dumpts(BSet *bs, Tmp *tmp, FILE *f) -{ - uint t; - - fprintf(f, "["); - for (t=Tmp0; bsiter(bs, &t); t++) - fprintf(f, " %s", tmp[t].name); - fprintf(f, " ]\n"); -} diff --git a/ssa.c b/ssa.c new file mode 100644 index 0000000..0c163aa --- /dev/null +++ b/ssa.c @@ -0,0 +1,516 @@ +#include "all.h" +#include + +static void +adduse(Tmp *tmp, int ty, Blk *b, ...) +{ + Use *u; + int n; + va_list ap; + + va_start(ap, b); + n = tmp->nuse; + vgrow(&tmp->use, ++tmp->nuse); + u = &tmp->use[n]; + u->type = ty; + u->bid = b->id; + switch (ty) { + default: + diag("ssa: adduse defaulted"); + case UPhi: + u->u.phi = va_arg(ap, Phi *); + break; + case UIns: + u->u.ins = va_arg(ap, Ins *); + break; + case UJmp: + break; + } + va_end(ap); +} + +/* fill usage, phi, and class information + */ +void +filluse(Fn *fn) +{ + Blk *b; + Phi *p; + Ins *i; + int m, t; + uint a; + Tmp *tmp; + + /* todo, is this the correct file? */ + tmp = fn->tmp; + for (t=0; tntmp; t++) { + tmp[t].ndef = 0; + tmp[t].nuse = 0; + tmp[t].phi = 0; + tmp[t].cls = 0; + if (tmp[t].use == 0) + tmp[t].use = vnew(0, sizeof(Use)); + } + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) { + assert(rtype(p->to) == RTmp); + t = p->to.val; + tmp[t].ndef++; + tmp[t].cls = p->cls; + tmp[t].phi = p->to.val; + for (a=0; anarg; a++) + if (rtype(p->arg[a]) == RTmp) { + t = p->arg[a].val; + adduse(&tmp[t], UPhi, b, p); + if (!tmp[t].phi) + tmp[t].phi = p->to.val; + } + } + for (i=b->ins; i-b->ins < b->nins; i++) { + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + tmp[t].ndef++; + tmp[t].cls = i->cls; + } + for (m=0; m<2; m++) + if (rtype(i->arg[m]) == RTmp) { + t = i->arg[m].val; + adduse(&tmp[t], UIns, b, i); + } + } + if (rtype(b->jmp.arg) == RTmp) + adduse(&tmp[b->jmp.arg.val], UJmp, b); + } +} + +static void +addpred(Blk *bp, Blk *bc) +{ + uint i; + + if (!bc->pred) { + bc->pred = alloc(bc->npred * sizeof bc->pred[0]); + for (i=0; inpred; i++) + bc->pred[i] = 0; + } + for (i=0; bc->pred[i]; i++) + ; + bc->pred[i] = bp; +} + +/* fill predecessors information in blocks + */ +void +fillpreds(Fn *f) +{ + Blk *b; + + for (b=f->start; b; b=b->link) { + b->npred = 0; + b->pred = 0; + } + for (b=f->start; b; b=b->link) { + if (b->s1) + b->s1->npred++; + if (b->s2) + b->s2->npred++; + } + for (b=f->start; b; b=b->link) { + if (b->s1) + addpred(b, b->s1); + if (b->s2) + addpred(b, b->s2); + } +} + +static int +rporec(Blk *b, int x) +{ + Blk *s1, *s2; + + if (!b || b->id >= 0) + return x; + b->id = 1; + s1 = b->s1; + s2 = b->s2; + if (s1 && s2 && s1->loop > s2->loop) { + s1 = b->s2; + s2 = b->s1; + } + x = rporec(s1, x); + x = rporec(s2, x); + b->id = x; + assert(x >= 0); + return x - 1; +} + +/* fill the rpo information in blocks + */ +void +fillrpo(Fn *f) +{ + int n; + Blk *b, **p; + + for (b=f->start; b; b=b->link) + b->id = -1; + n = 1 + rporec(f->start, f->nblk-1); + f->nblk -= n; + f->rpo = alloc(f->nblk * sizeof f->rpo[0]); + for (p=&f->start; *p;) { + b = *p; + if (b->id == -1) { + *p = b->link; + /* todo, free block */ + } else { + b->id -= n; + f->rpo[b->id] = b; + p=&(*p)->link; + } + } +} + +/* for dominators computation, read + * "A Simple, Fast Dominance Algorithm" + * by K. Cooper, T. Harvey, and K. Kennedy. + */ + +static Blk * +inter(Blk *b1, Blk *b2) +{ + Blk *bt; + + if (b1 == 0) + return b2; + while (b1 != b2) { + if (b1->id < b2->id) { + bt = b1; + b1 = b2; + b2 = bt; + } + while (b1->id > b2->id) { + b1 = b1->idom; + assert(b1); + } + } + return b1; +} + +static void +filldom(Fn *fn) +{ + Blk *b, *d; + int ch, n; + uint p; + + for (b=fn->start; b; b=b->link) { + b->idom = 0; + b->dom = 0; + b->dlink = 0; + } + do { + ch = 0; + for (n=1; nnblk; n++) { + b = fn->rpo[n]; + d = 0; + for (p=0; pnpred; p++) + if (b->pred[p]->idom + || b->pred[p] == fn->start) + d = inter(d, b->pred[p]); + if (d != b->idom) { + ch++; + b->idom = d; + } + } + } while (ch); + for (b=fn->start; b; b=b->link) + if ((d=b->idom)) { + assert(d != b); + b->dlink = d->dom; + d->dom = b; + } +} + +static int +sdom(Blk *b1, Blk *b2) +{ + assert(b1 && b2); + if (b1 == b2) + return 0; + while (b2->id > b1->id) + b2 = b2->idom; + return b1 == b2; +} + +static int +dom(Blk *b1, Blk *b2) +{ + return b1 == b2 || sdom(b1, b2); +} + +static void +addfron(Blk *a, Blk *b) +{ + int n; + + for (n=0; nnfron; n++) + if (a->fron[n] == b) + return; + if (!a->nfron) + a->fron = vnew(++a->nfron, sizeof a->fron[0]); + else + vgrow(&a->fron, ++a->nfron); + a->fron[a->nfron-1] = b; +} + +static void +fillfron(Fn *fn) +{ + Blk *a, *b; + + for (b=fn->start; b; b=b->link) { + if (b->s1) + for (a=b; !sdom(a, b->s1); a=a->idom) + addfron(a, b->s1); + if (b->s2) + for (a=b; !sdom(a, b->s2); a=a->idom) + addfron(a, b->s2); + } +} + +static Ref +refindex(int t, Fn *fn) +{ + return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn); +} + +static void +phiins(Fn *fn) +{ + BSet u[1], defs[1]; + Blk *a, *b, **blist, **be, **bp; + Ins *i; + Phi *p; + Ref r; + int t, n, k, nt; + + bsinit(u, fn->nblk); + bsinit(defs, fn->nblk); + blist = emalloc(fn->nblk * sizeof blist[0]); + be = &blist[fn->nblk]; + nt = fn->ntmp; + for (t=Tmp0; ttmp[t].visit = 0; + if (fn->tmp[t].phi != 0) + continue; + bszero(u); + k = -1; + bp = be; + for (b=fn->start; b; b=b->link) { + b->visit = 0; + r = R; + for (i=b->ins; i-b->ins < b->nins; i++) { + if (!req(r, R)) { + if (req(i->arg[0], TMP(t))) + i->arg[0] = r; + if (req(i->arg[1], TMP(t))) + i->arg[1] = r; + } + if (req(i->to, TMP(t))) { + if (!bshas(b->out, t)) { + if (fn->tmp[t].ndef == 1) + r = TMP(t); + else + r = refindex(t, fn); + i->to = r; + } else { + if (!bshas(u, b->id)) { + bsset(u, b->id); + *--bp = b; + } + if (k == -1) + k = i->cls; + assert(k == i->cls); + } + } + } + if (!req(r, R) && req(b->jmp.arg, TMP(t))) + b->jmp.arg = r; + } + bscopy(defs, u); + while (bp != be) { + fn->tmp[t].visit = t; + b = *bp++; + bsclr(u, b->id); + for (n=0; nnfron; n++) { + a = b->fron[n]; + if (a->visit++ == 0) + if (bshas(a->in, t)) { + p = alloc(sizeof *p); + p->cls = k; + p->to = TMP(t); + p->link = a->phi; + a->phi = p; + if (!bshas(defs, a->id)) + if (!bshas(u, a->id)) { + bsset(u, a->id); + *--bp = a; + } + } + } + } + } + free(blist); +} + +typedef struct Name Name; +struct Name { + Ref r; + Blk *b; + Name *up; +}; + +static Name *namel; + +static Name * +nnew(Ref r, Blk *b, Name *up) +{ + Name *n; + + if (namel) { + n = namel; + namel = n->up; + } else + /* could use alloc, here + * but namel should be reset + */ + n = emalloc(sizeof *n); + n->r = r; + n->b = b; + n->up = up; + return n; +} + +static void +nfree(Name *n) +{ + n->up = namel; + namel = n; +} + +static void +rendef(Ref *r, Blk *b, Name **stk, Fn *fn) +{ + Ref r1; + int t; + + t = r->val; + if (req(*r, R) || !fn->tmp[t].visit) + return; + r1 = refindex(t, fn); + fn->tmp[r1.val].visit = t; + stk[t] = nnew(r1, b, stk[t]); + *r = r1; +} + +static Ref +getstk(int t, Blk *b, Name **stk) +{ + Name *n, *n1; + + n = stk[t]; + while (n && !dom(n->b, b)) { + n1 = n; + n = n->up; + nfree(n1); + } + stk[t] = n; + if (!n) { + /* uh, oh, warn */ + return CON_Z; + } else + return n->r; +} + +static void +renblk(Blk *b, Name **stk, Fn *fn) +{ + Phi *p; + Ins *i; + Blk *s, **ps, *succ[3]; + int t, m; + + for (p=b->phi; p; p=p->link) + rendef(&p->to, b, stk, fn); + for (i=b->ins; i-b->ins < b->nins; i++) { + for (m=0; m<2; m++) { + t = i->arg[m].val; + if (rtype(i->arg[m]) == RTmp) + if (fn->tmp[t].visit) + i->arg[m] = getstk(t, b, stk); + } + rendef(&i->to, b, stk, fn); + } + t = b->jmp.arg.val; + if (rtype(b->jmp.arg) == RTmp) + if (fn->tmp[t].visit) + b->jmp.arg = getstk(t, b, stk); + succ[0] = b->s1; + succ[1] = b->s2; + succ[2] = 0; + for (ps=succ; (s=*ps); ps++) + for (p=s->phi; p; p=p->link) { + t = p->to.val; + if ((t=fn->tmp[t].visit)) { + m = p->narg++; + if (m == NPred) + diag("ssa: too many phi arguments"); + p->arg[m] = getstk(t, b, stk); + p->blk[m] = b; + } + } + for (s=b->dom; s; s=s->dlink) + renblk(s, stk, fn); +} + +/* require ndef */ +void +ssa(Fn *fn) +{ + Name **stk, *n; + int d, nt; + Blk *b, *b1; + + nt = fn->ntmp; + stk = emalloc(nt * sizeof stk[0]); + d = debug['L']; + debug['L'] = 0; + filldom(fn); + if (debug['N']) { + fprintf(stderr, "\n> Dominators:\n"); + for (b1=fn->start; b1; b1=b1->link) { + if (!b1->dom) + continue; + fprintf(stderr, "%10s:", b1->name); + for (b=b1->dom; b; b=b->dlink) + fprintf(stderr, " %s", b->name); + fprintf(stderr, "\n"); + } + } + fillfron(fn); + filllive(fn); + phiins(fn); + renblk(fn->start, stk, fn); + while (nt--) + while ((n=stk[nt])) { + stk[nt] = n->up; + nfree(n); + } + debug['L'] = d; + free(stk); + if (debug['N']) { + fprintf(stderr, "\n> After SSA construction:\n"); + printfn(fn, stderr); + } +} diff --git a/test/go.sh b/test/go.sh deleted file mode 100755 index d2563d4..0000000 --- a/test/go.sh +++ /dev/null @@ -1,119 +0,0 @@ -#!/bin/sh - -DIR=`cd $(dirname "$0"); pwd` -QBE=$DIR/../src/qbe - -TMP=/tmp/qbe.zzzz - -DRV=$TMP.c -ASM=$TMP.s -BIN=$TMP.bin -OUT=$TMP.out - -cleanup() { - rm -f $DRV $ASM $BIN $OUT -} - -extract() { - WHAT="$1" - FILE="$2" - - awk " - /^# >>> $WHAT/ { - p = 1 - next - } - /^# <<&2 - exit 1 - fi - - echo "$(basename $T)..." - - if ! $QBE -o $ASM $T - then - echo "[qbe fail]" - return 1 - fi - - extract driver $T > $DRV - extract output $T > $OUT - - if test -s $DRV - then - LNK="$DRV $ASM" - else - LNK="$ASM" - fi - - if ! cc -g -o $BIN $LNK - then - echo "[cc fail]" - return 1 - fi - - if test -s $OUT - then - $BIN a b c | diff - $OUT - RET=$? - REASON="output" - else - $BIN a b c - RET=$? - REASON="returned $RET" - fi - - if test $RET -ne 0 - then - echo "[$REASON fail]" - return 1 - fi - - printf "\033[1A\033[45C[ok]\n" -} - - -#trap cleanup TERM QUIT - -if test -z "$1" -then - echo "usage: test/go.sh {all, SSAFILE}" 2>&1 - exit 1 -fi - -case $1 in - "all") - F=0 - for T in $DIR/[!_]*.ssa - do - once $T - F=`expr $F + $?` - done - if test $F -ge 1 - then - echo - echo "$F test(s) failed!" - else - echo - echo "All is fine!" - fi - ;; - *) - once $1 - exit $? - ;; -esac diff --git a/tools/abifuzz.sh b/tools/abifuzz.sh index 57930fb..e715b3d 100755 --- a/tools/abifuzz.sh +++ b/tools/abifuzz.sh @@ -2,7 +2,7 @@ OCAMLC=${OCAMLC:-/usr/bin/ocamlc} DIR=`cd $(dirname "$0"); pwd` -QBE=$DIR/../src/qbe +QBE=$DIR/../obj/qbe failure() { echo "Failure at stage:" $1 >&2 diff --git a/tools/fptox.c b/tools/fptox.c deleted file mode 100644 index a2bc155..0000000 --- a/tools/fptox.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include - -int -main(int ac, char *av[]) -{ - double d; - float f; - - if (ac < 2) { - usage: - fputs("usage: fptox NUMBER\n", stderr); - return 1; - } - f = d = strtod(av[1], 0); - printf("0x%08x 0x%016llx\n", *(unsigned *)&f, *(unsigned long long*)&d); - return 0; -} diff --git a/tools/pmov.c b/tools/pmov.c index 9136374..62d3921 100644 --- a/tools/pmov.c +++ b/tools/pmov.c @@ -13,7 +13,7 @@ static void assert_test(char *, int), fail(void), iexec(int *); -#include "../src/rega.c" +#include "../../rega.c" static void bsinit_(BSet *, uint); diff --git a/tools/regress.sh b/tools/regress.sh deleted file mode 100755 index 5aaea35..0000000 --- a/tools/regress.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -for t in ../test/* -do - printf "Test $t ... " - - ./qbe $t >/tmp/out.0 2>&1 - ./qbe.1 $t >/tmp/out.1 2>&1 - - if diff /tmp/out.0 /tmp/out.1 > /dev/null - then - echo "OK" - else - echo "KO" - break - fi -done diff --git a/tools/unit.sh b/tools/unit.sh new file mode 100755 index 0000000..c4a85d2 --- /dev/null +++ b/tools/unit.sh @@ -0,0 +1,119 @@ +#!/bin/sh + +DIR=`cd $(dirname "$0"); pwd` +QBE=$DIR/../obj/qbe + +TMP=/tmp/qbe.zzzz + +DRV=$TMP.c +ASM=$TMP.s +BIN=$TMP.bin +OUT=$TMP.out + +cleanup() { + rm -f $DRV $ASM $BIN $OUT +} + +extract() { + WHAT="$1" + FILE="$2" + + awk " + /^# >>> $WHAT/ { + p = 1 + next + } + /^# <<&2 + exit 1 + fi + + echo "$(basename $T)..." + + if ! $QBE -o $ASM $T + then + echo "[qbe fail]" + return 1 + fi + + extract driver $T > $DRV + extract output $T > $OUT + + if test -s $DRV + then + LNK="$DRV $ASM" + else + LNK="$ASM" + fi + + if ! cc -g -o $BIN $LNK + then + echo "[cc fail]" + return 1 + fi + + if test -s $OUT + then + $BIN a b c | diff - $OUT + RET=$? + REASON="output" + else + $BIN a b c + RET=$? + REASON="returned $RET" + fi + + if test $RET -ne 0 + then + echo "[$REASON fail]" + return 1 + fi + + printf "\033[1A\033[45C[ok]\n" +} + + +#trap cleanup TERM QUIT + +if test -z "$1" +then + echo "usage: test/go.sh {all, SSAFILE}" 2>&1 + exit 1 +fi + +case $1 in + "all") + F=0 + for T in $DIR/../test/[!_]*.ssa + do + once $T + F=`expr $F + $?` + done + if test $F -ge 1 + then + echo + echo "$F test(s) failed!" + else + echo + echo "All is fine!" + fi + ;; + *) + once $1 + exit $? + ;; +esac diff --git a/util.c b/util.c new file mode 100644 index 0000000..65b3ff8 --- /dev/null +++ b/util.c @@ -0,0 +1,329 @@ +#include "all.h" + +typedef struct Bitset Bitset; +typedef struct Vec Vec; + +struct Vec { + ulong mag; + size_t esz; + ulong cap; + union { + long long ll; + long double ld; + void *ptr; + } align[]; +}; + +enum { + VMin = 2, + VMag = 0xcabba9e, + NPtr = 256, +}; + +Typ typ[NTyp]; +Ins insb[NIns], *curi; + +static void *ptr[NPtr]; +static void **pool = ptr; +static int nptr = 1; + +void +diag(char *s) +{ + fputs(s, stderr); + fputc('\n', stderr); + abort(); +} + +void * +emalloc(size_t n) +{ + void *p; + + p = calloc(1, n); + if (!p) + diag("emalloc: out of memory"); + return p; +} + +void * +alloc(size_t n) +{ + void **pp; + + if (n == 0) + return 0; + if (nptr >= NPtr) { + pp = emalloc(NPtr * sizeof(void *)); + pp[0] = pool; + pool = pp; + nptr = 1; + } + return pool[nptr++] = emalloc(n); +} + +void +freeall() +{ + void **pp; + + for (;;) { + for (pp = &pool[1]; pp < &pool[nptr]; pp++) + free(*pp); + pp = pool[0]; + if (!pp) + break; + free(pool); + pool = pp; + nptr = NPtr; + } + nptr = 1; +} + +Blk * +blknew() +{ + static Blk z; + Blk *b; + + b = alloc(sizeof *b); + *b = z; + return b; +} + +void +emit(int op, int k, Ref to, Ref arg0, Ref arg1) +{ + if (curi == insb) + diag("emit: too many instructions"); + *--curi = (Ins){ + .op = op, .cls = k, + .to = to, .arg = {arg0, arg1} + }; +} + +void +emiti(Ins i) +{ + emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]); +} + +void +idup(Ins **pd, Ins *s, ulong n) +{ + *pd = alloc(n * sizeof(Ins)); + memcpy(*pd, s, n * sizeof(Ins)); +} + +Ins * +icpy(Ins *d, Ins *s, ulong n) +{ + memcpy(d, s, n * sizeof(Ins)); + return d + n; +} + +void * +vnew(ulong len, size_t esz) +{ + ulong cap; + Vec *v; + + for (cap=VMin; capmag = VMag; + v->cap = cap; + v->esz = esz; + return v + 1; +} + +void +vgrow(void *vp, ulong len) +{ + Vec *v; + void *v1; + + v = *(Vec **)vp - 1; + assert(v+1 && v->mag == VMag); + if (v->cap >= len) + return; + v1 = vnew(len, v->esz); + memcpy(v1, v+1, v->cap * v->esz); + *(Vec **)vp = v1; +} + +int +phicls(int t, Tmp *tmp /*, int c*/) +{ + if (tmp[t].phi) + return tmp[t].phi; + return t; +#if 0 + int t1; + + t1 = tmp[t].phi; + if (!t1) + t1 = t; + if (t != t1) { + t1 = phitmp(t1, tmp, c); + if (c) + tmp[t].phi = t1; + } + return t1; +#endif +} + +Ref +newtmp(char *prfx, int k, Fn *fn) +{ + static int n; + int t; + + t = fn->ntmp++; + vgrow(&fn->tmp, fn->ntmp); + sprintf(fn->tmp[t].name, "%s%d", prfx, ++n); + fn->tmp[t].cls = k; + fn->tmp[t].slot = -1; + fn->tmp[t].nuse = +1; + fn->tmp[t].ndef = +1; + return TMP(t); +} + +Ref +getcon(int64_t val, Fn *fn) +{ + int c; + + for (c=0; cncon; c++) + if (fn->con[c].type == CBits && fn->con[c].bits.i == val) + return CON(c); + fn->ncon++; + vgrow(&fn->con, fn->ncon); + fn->con[c] = (Con){.type = CBits, .bits.i = val}; + return CON(c); +} + +void +addcon(Con *c0, Con *c1) +{ + if (c0->type == CUndef) + *c0 = *c1; + else { + if (c1->type == CAddr) { + if (c0->type == CAddr) + diag("addcon: adding two addresses"); + c0->type = CAddr; + strcpy(c0->label, c1->label); + } + c0->bits.i += c1->bits.i; + } +} + +void +bsinit(BSet *bs, uint n) +{ + n = (n + NBit-1) / NBit; + bs->nt = n; + bs->t = alloc(n * sizeof bs->t[0]); +} + +uint +bscount(BSet *bs) +{ + uint i, j, n; + + n = 0; + for (i=0; int; i++) + for (j=0; jt[i] & BIT(j)) + n++; + return n; +} + +static inline uint +bsmax(BSet *bs) +{ + return bs->nt * NBit; +} + +void +bsset(BSet *bs, uint elt) +{ + assert(elt < bsmax(bs)); + bs->t[elt/NBit] |= BIT(elt%NBit); +} + +void +bsclr(BSet *bs, uint elt) +{ + assert(elt < bsmax(bs)); + bs->t[elt/NBit] &= ~BIT(elt%NBit); +} + +#define BSOP(f, op) \ + void \ + f(BSet *a, BSet *b) \ + { \ + uint i; \ + \ + assert(a->nt == b->nt); \ + for (i=0; int; i++) \ + a->t[i] op b->t[i]; \ + } + +BSOP(bscopy, =) +BSOP(bsunion, |=) +BSOP(bsinter, &=) +BSOP(bsdiff, &= ~) + +int +bsequal(BSet *a, BSet *b) +{ + uint i; + + assert(a->nt == b->nt); + for (i=0; int; i++) + if (a->t[i] != b->t[i]) + return 0; + return 1; +} + +void +bszero(BSet *bs) +{ + memset(bs->t, 0, bs->nt * sizeof bs->t[0]); +} + +/* iterates on a bitset, use as follows + * + * for (i=0; bsiter(set, &i); i++) + * use(i); + * + */ +int +bsiter(BSet *bs, uint *elt) +{ + uint i; + + for (i=*elt;; i++) { + while (i < bsmax(bs) && !bs->t[i/NBit]) + i = (i + NBit) & -NBit; + if (i >= bsmax(bs)) + return 0; + if (bshas(bs, i)) { + *elt = i; + return 1; + } + } +} + +void +dumpts(BSet *bs, Tmp *tmp, FILE *f) +{ + uint t; + + fprintf(f, "["); + for (t=Tmp0; bsiter(bs, &t); t++) + fprintf(f, " %s", tmp[t].name); + fprintf(f, " ]\n"); +} -- cgit 1.4.1