summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/.gitignore5
-rw-r--r--src/.tag11
-rw-r--r--src/Makefile24
-rw-r--r--src/all.h563
-rw-r--r--src/copy.c159
-rw-r--r--src/emit.c669
-rw-r--r--src/isel.c1136
-rw-r--r--src/live.c174
-rw-r--r--src/main.c140
-rw-r--r--src/mem.c81
-rw-r--r--src/parse.c1099
-rw-r--r--src/rega.c598
-rw-r--r--src/spill.c507
-rw-r--r--src/ssa.c516
-rw-r--r--src/util.c329
15 files changed, 0 insertions, 6011 deletions
diff --git a/src/.gitignore b/src/.gitignore
deleted file mode 100644
index 5c8ecc2..0000000
--- a/src/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-qbe
-config.h
-.comfile
-*.o
-*.out
diff --git a/src/.tag b/src/.tag
deleted file mode 100644
index 5b8c210..0000000
--- a/src/.tag
+++ /dev/null
@@ -1,11 +0,0 @@
-Look slot(
-
-Get lisc.h
-Get parse.c
-Get isel.c
-Get spill.c
-Get rega.c
-Get emit.c
-
-New
-|fmt
diff --git a/src/Makefile b/src/Makefile
deleted file mode 100644
index 6adfbd3..0000000
--- a/src/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-BIN = qbe
-OBJ = main.o util.o parse.o mem.o ssa.o copy.o live.o isel.o spill.o rega.o emit.o
-
-CFLAGS += -Wall -Wextra -std=c99 -g -pedantic
-
-$(BIN): $(OBJ)
-	$(CC) $(LDFLAGS) $(OBJ) -o $@
-
-$(OBJ): all.h config.h
-
-config.h:
-	@case `uname` in                                \
-	*Darwin*) echo "#define Defaultasm Gasmacho" ;; \
-	*Linux*)  echo "#define Defaultasm Gaself" ;;   \
-	esac > $@
-
-
-all: $(BIN)
-clean:
-	rm -f $(BIN) $(OBJ)
-check: $(BIN)
-	../test/go.sh all
-
-.PHONY: all clean check syndoc
diff --git a/src/all.h b/src/all.h
deleted file mode 100644
index 40c80f6..0000000
--- a/src/all.h
+++ /dev/null
@@ -1,563 +0,0 @@
-#include <assert.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1]
-
-typedef unsigned int uint;
-typedef unsigned short ushort;
-typedef unsigned long ulong;
-typedef unsigned long bits;
-
-typedef struct BSet BSet;
-typedef struct Ref Ref;
-typedef struct OpDesc OpDesc;
-typedef struct Ins Ins;
-typedef struct Phi Phi;
-typedef struct Blk Blk;
-typedef struct Use Use;
-typedef struct Tmp Tmp;
-typedef struct Con Con;
-typedef struct Addr Mem;
-typedef struct Fn Fn;
-typedef struct Typ Typ;
-typedef struct Dat Dat;
-
-enum Reg {
-	RXX,
-
-	RAX, /* caller-save */
-	RCX,
-	RDX,
-	RSI,
-	RDI,
-	R8,
-	R9,
-	R10,
-	R11,
-
-	RBX, /* callee-save */
-	R12,
-	R13,
-	R14,
-	R15,
-
-	RBP, /* reserved */
-	RSP,
-
-	XMM0, /* sse */
-	XMM1,
-	XMM2,
-	XMM3,
-	XMM4,
-	XMM5,
-	XMM6,
-	XMM7,
-	XMM8,
-	XMM9,
-	XMM10,
-	XMM11,
-	XMM12,
-	XMM13,
-	XMM14,
-	XMM15,
-
-	Tmp0, /* first non-reg temporary */
-
-	NIReg = R12 - RAX + 1,
-	NFReg = XMM14 - XMM0 + 1,
-	NISave = 9,
-	NFSave = NFReg,
-	NRSave = NISave + NFSave,
-	NRClob = 5,
-};
-
-enum {
-	NString = 32,
-	NPred   = 63,
-	NIns    = 8192,
-	NAlign  = 3,
-	NSeg    = 32,
-	NTyp    = 128,
-	NBit    = CHAR_BIT * sizeof(bits),
-};
-
-MAKESURE(NBit_is_enough, NBit >= (int)Tmp0);
-
-#define BIT(n) ((bits)1 << (n))
-
-struct BSet {
-	uint nt;
-	bits *t;
-};
-
-struct Ref {
-	uint16_t type:2;
-	uint16_t val:14;
-};
-
-enum Alt {
-	AType,
-	ACall,
-	AMem,
-
-	AShift = 12,
-	AMask = (1<<AShift) - 1
-};
-
-enum {
-	RTmp,
-	RCon,
-	RSlot,
-	RAlt,
-
-	RAType = RAlt + AType,
-	RACall = RAlt + ACall,
-	RAMem  = RAlt + AMem,
-
-	NRef = (1<<14) - 1
-};
-
-#define R        (Ref){0, 0}
-#define TMP(x)   (Ref){RTmp, x}
-#define CON(x)   (Ref){RCon, x}
-#define CON_Z    CON(0)          /* reserved zero constant */
-#define SLOT(x)  (Ref){RSlot, x}
-#define TYPE(x)  (Ref){RAlt, (x)|(AType<<AShift)}
-#define CALL(x)  (Ref){RAlt, (x)|(ACall<<AShift)}
-#define MEM(x)   (assert(x<(1<<AShift) && "too many mems"), \
-                 (Ref){RAlt, (x)|(AMem<<AShift)})
-
-static inline int req(Ref a, Ref b)
-{
-	return a.type == b.type && a.val == b.val;
-}
-
-static inline int rtype(Ref r)
-{
-	if (req(r, R))
-		return -1;
-	if (r.type == RAlt)
-		return RAlt + (r.val >> AShift);
-	return r.type;
-}
-
-static inline int isreg(Ref r)
-{
-	return rtype(r) == RTmp && r.val < Tmp0;
-}
-
-enum ICmp {
-#define ICMPS(X) \
-	X(ule)   \
-	X(ult)   \
-	X(sle)   \
-	X(slt)   \
-	X(sgt)   \
-	X(sge)   \
-	X(ugt)   \
-	X(uge)   \
-	X(eq)    \
-	X(ne) /* make sure icmpop() below works! */
-
-#define X(c) IC##c,
-	ICMPS(X)
-#undef X
-	NICmp,
-
-	ICXnp = NICmp, /* x64 specific */
-	ICXp,
-	NXICmp
-};
-
-static inline int icmpop(int c)
-{
-	return c >= ICeq ? c : ICuge - c;
-}
-
-enum FCmp {
-#define FCMPS(X) \
-	X(le)    \
-	X(lt)    \
-	X(gt)    \
-	X(ge)    \
-	X(ne)    \
-	X(eq)    \
-	X(o)     \
-	X(uo)
-
-#define X(c) FC##c,
-	FCMPS(X)
-#undef X
-	NFCmp
-};
-
-enum Class {
-	Kw,
-	Kl,
-	Ks,
-	Kd
-};
-
-#define KWIDE(k) ((k)&1)
-#define KBASE(k) ((k)>>1)
-
-enum Op {
-	OXXX,
-
-	/* public instructions */
-	OAdd,
-	OSub,
-	ODiv,
-	ORem,
-	OUDiv,
-	OURem,
-	OMul,
-	OAnd,
-	OOr,
-	OXor,
-	OSar,
-	OShr,
-	OShl,
-	OCmpw,
-	OCmpw1 = OCmpw + NICmp-1,
-	OCmpl,
-	OCmpl1 = OCmpl + NICmp-1,
-	OCmps,
-	OCmps1 = OCmps + NFCmp-1,
-	OCmpd,
-	OCmpd1 = OCmpd + NFCmp-1,
-
-	OStored,
-	OStores,
-	OStorel,
-	OStorew,
-	OStoreh,
-	OStoreb,
-#define isstore(o) (OStored <= o && o <= OStoreb)
-	OLoadsw,  /* needs to match OExt (mem.c) */
-	OLoaduw,
-	OLoadsh,
-	OLoaduh,
-	OLoadsb,
-	OLoadub,
-	OLoad,
-#define isload(o) (OLoadsw <= o && o <= OLoad)
-	OExtsw,
-	OExtuw,
-	OExtsh,
-	OExtuh,
-	OExtsb,
-	OExtub,
-#define isext(o) (OExtsw <= o && o <= OExtub)
-
-	OExts,
-	OTruncd,
-	OFtosi,
-	OSitof,
-	OCast,
-
-	OAlloc,
-	OAlloc1 = OAlloc + NAlign-1,
-
-	OCopy,
-	NPubOp,
-
-	/* function instructions */
-	OPar = NPubOp,
-	OParc,
-	OArg,
-	OArgc,
-	OCall,
-
-	/* reserved instructions */
-	ONop,
-	OAddr,
-	OSwap,
-	OSign,
-	OSAlloc,
-	OXIDiv,
-	OXDiv,
-	OXCmp,
-	OXSet,
-	OXSetnp = OXSet + ICXnp,
-	OXSetp  = OXSet + ICXp,
-	OXTest,
-	NOp
-};
-
-enum Jmp {
-	JXXX,
-	JRet0,
-	JRetw,
-	JRetl,
-	JRets,
-	JRetd,
-	JRetc,
-#define isret(j) (JRet0 <= j && j <= JRetc)
-	JJmp,
-	JJnz,
-	JXJc,
-	JXJnp = JXJc + ICXnp,
-	JXJp  = JXJc + ICXp,
-	NJmp
-};
-
-struct OpDesc {
-	char *name;
-	int nmem;
-	char argcls[2][4];
-	uint sflag:1; /* sets the zero flag */
-	uint lflag:1; /* leaves flags */
-};
-
-struct Ins {
-	ushort op:14;
-	Ref to;
-	Ref arg[2];
-	ushort cls:2;
-};
-
-struct Phi {
-	Ref to;
-	Ref arg[NPred];
-	Blk *blk[NPred];
-	uint narg;
-	int cls;
-	Phi *link;
-};
-
-struct Blk {
-	Phi *phi;
-	Ins *ins;
-	uint nins;
-	struct {
-		short type;
-		Ref arg;
-	} jmp;
-	Blk *s1;
-	Blk *s2;
-	Blk *link;
-
-	int id;
-	int visit;
-
-	Blk *idom;
-	Blk *dom, *dlink;
-	Blk **fron;
-	int nfron;
-
-	Blk **pred;
-	uint npred;
-	BSet in[1], out[1], gen[1];
-	int nlive[2];
-	int loop;
-	char name[NString];
-};
-
-struct Use {
-	enum {
-		UXXX,
-		UPhi,
-		UIns,
-		UJmp,
-	} type;
-	int bid;
-	union {
-		Ins *ins;
-		Phi *phi;
-	} u;
-};
-
-struct Tmp {
-	char name[NString];
-	Use *use;
-	uint ndef, nuse;
-	uint cost;
-	short slot;
-	short cls;
-	struct {
-		int r;
-		bits m;
-	} hint;
-	int phi;
-	int visit;
-};
-
-struct Con {
-	enum {
-		CUndef,
-		CBits,
-		CAddr,
-	} type;
-	char label[NString];
-	union {
-		int64_t i;
-		double d;
-		float s;
-	} bits;
-	char flt; /* for printing, see parse.c */
-	char local;
-};
-
-typedef struct Addr Addr;
-
-struct Addr { /* x64 addressing */
-	Con offset;
-	Ref base;
-	Ref index;
-	int scale;
-};
-
-struct Fn {
-	Blk *start;
-	Tmp *tmp;
-	Con *con;
-	Mem *mem;
-	int ntmp;
-	int ncon;
-	int nmem;
-	int nblk;
-	int retty; /* index in typ[], -1 if no aggregate return */
-	Ref retr;
-	Blk **rpo;
-	bits reg;
-	int slot;
-	char export;
-	char name[NString];
-};
-
-struct Typ {
-	char name[NString];
-	int dark;
-	uint size;
-	int align;
-
-	struct {
-		uint isflt:1;
-		uint ispad:1;
-		uint len:30;
-	} seg[NSeg+1];
-};
-
-struct Dat {
-	enum {
-		DStart,
-		DEnd,
-		DName,
-		DAlign,
-		DB,
-		DH,
-		DW,
-		DL,
-		DZ
-	} type;
-	union {
-		int64_t num;
-		double fltd;
-		float flts;
-		char *str;
-		struct {
-			char *nam;
-			int64_t off;
-		} ref;
-	} u;
-	char isref;
-	char isstr;
-	char export;
-};
-
-
-/* main.c */
-enum Asm {
-	Gasmacho,
-	Gaself,
-};
-extern char debug['Z'+1];
-
-/* util.c */
-extern Typ typ[NTyp];
-extern Ins insb[NIns], *curi;
-void diag(char *) __attribute__((noreturn));
-void *emalloc(size_t);
-void *alloc(size_t);
-void freeall(void);
-Blk *blknew(void);
-void emit(int, int, Ref, Ref, Ref);
-void emiti(Ins);
-void idup(Ins **, Ins *, ulong);
-Ins *icpy(Ins *, Ins *, ulong);
-void *vnew(ulong, size_t);
-void vgrow(void *, ulong);
-int phicls(int, Tmp *);
-Ref newtmp(char *, int, Fn *);
-Ref getcon(int64_t, Fn *);
-void addcon(Con *, Con *);
-void dumpts(BSet *, Tmp *, FILE *);
-
-void bsinit(BSet *, uint);
-void bszero(BSet *);
-uint bscount(BSet *);
-void bsset(BSet *, uint);
-void bsclr(BSet *, uint);
-void bscopy(BSet *, BSet *);
-void bsunion(BSet *, BSet *);
-void bsinter(BSet *, BSet *);
-void bsdiff(BSet *, BSet *);
-int bsequal(BSet *, BSet *);
-int bsiter(BSet *, uint *);
-
-static inline int
-bshas(BSet *bs, uint elt)
-{
-	assert(elt < bs->nt * NBit);
-	return (bs->t[elt/NBit] & BIT(elt%NBit)) != 0;
-}
-
-/* parse.c */
-extern OpDesc opdesc[NOp];
-void parse(FILE *, char *, void (Dat *), void (Fn *));
-void printfn(Fn *, FILE *);
-void printref(Ref, Fn *, FILE *);
-void err(char *, ...);
-
-/* mem.c */
-void memopt(Fn *);
-
-/* ssa.c */
-void filluse(Fn *);
-void fillpreds(Fn *);
-void fillrpo(Fn *);
-void ssa(Fn *);
-
-/* copy.c */
-void copy(Fn *);
-
-/* live.c */
-void liveon(BSet *, Blk *, Blk *);
-void filllive(Fn *);
-
-/* isel.c */
-extern int rsave[/* NRSave */];
-extern int rclob[/* NRClob */];
-bits retregs(Ref, int[2]);
-bits argregs(Ref, int[2]);
-void isel(Fn *);
-
-/* spill.c */
-void fillcost(Fn *);
-void spill(Fn *);
-
-/* rega.c */
-void rega(Fn *);
-
-/* emit.c */
-extern char *locprefix;
-extern char *symprefix;
-void emitfn(Fn *, FILE *);
-void emitdat(Dat *, FILE *);
-int stashfp(int64_t, int);
-void emitfin(FILE *);
diff --git a/src/copy.c b/src/copy.c
deleted file mode 100644
index ef2d01d..0000000
--- a/src/copy.c
+++ /dev/null
@@ -1,159 +0,0 @@
-#include "all.h"
-
-typedef struct RList RList;
-struct RList {
-	int t;
-	RList *l;
-};
-
-static Ref
-copyof(Ref r, Ref *cp)
-{
-	if (rtype(r) == RTmp)
-		return cp[r.val];
-	else
-		return r;
-}
-
-static void
-update(Ref r, Ref rcp, Ref *cp, RList **w)
-{
-	RList *l;
-
-	if (!req(cp[r.val], rcp)) {
-		cp[r.val] = rcp;
-		l = emalloc(sizeof *l);
-		l->t = r.val;
-		l->l = *w;
-		*w = l;
-	}
-}
-
-static void
-visitphi(Phi *p, Ref *cp, RList **w)
-{
-	uint a;
-	Ref r, r1;
-
-	r = R;
-	for (a=0; a<p->narg; a++) {
-		r1 = copyof(p->arg[a], cp);
-		if (req(r1, R))
-			continue;
-		if (req(r, R) || req(r, r1))
-			r = r1;
-		else {
-			r = p->to;
-			break;
-		}
-	}
-	assert(!req(r, R));
-	update(p->to, r, cp, w);
-}
-
-static void
-visitins(Ins *i, Ref *cp, RList **w)
-{
-	Ref r;
-
-	if (i->op == OCopy) {
-		r = copyof(i->arg[0], cp);
-		update(i->to, r, cp, w);
-	} else if (!req(i->to, R)) {
-		assert(rtype(i->to) == RTmp);
-		update(i->to, i->to, cp, w);
-	}
-}
-
-void
-copy(Fn *fn)
-{
-	Blk *b;
-	Ref *cp, r;
-	RList *w, *w1;
-	Use *u, *u1;
-	Ins *i;
-	Phi *p, **pp;
-	uint a;
-	int t;
-
-	w = 0;
-	cp = emalloc(fn->ntmp * sizeof cp[0]);
-	for (b=fn->start; b; b=b->link) {
-		for (p=b->phi; p; p=p->link)
-			visitphi(p, cp, &w);
-		for (i=b->ins; i-b->ins < b->nins; i++)
-			visitins(i, cp, &w);
-	}
-	while ((w1=w)) {
-		t = w->t;
-		w = w->l;
-		free(w1);
-		u = fn->tmp[t].use;
-		u1 = u + fn->tmp[t].nuse;
-		for (; u<u1; u++)
-			switch (u->type) {
-			default:
-				diag("copy: invalid use");
-			case UPhi:
-				visitphi(u->u.phi, cp, &w);
-				break;
-			case UIns:
-				visitins(u->u.ins, cp, &w);
-				break;
-			case UJmp:
-				break;
-			}
-	}
-	for (b=fn->start; b; b=b->link) {
-		for (pp=&b->phi; (p=*pp);) {
-			r = cp[p->to.val];
-			if (!req(r, p->to)) {
-				*pp = p->link;
-				continue;
-			}
-			for (a=0; a<p->narg; a++)
-				if (rtype(p->arg[a]) == RTmp) {
-					r = cp[p->arg[a].val];
-					assert(!req(r, R));
-					p->arg[a] = r;
-				}
-			pp=&p->link;
-		}
-		for (i=b->ins; i-b->ins < b->nins; i++) {
-			r = cp[i->to.val];
-			if (!req(r, i->to)) {
-				*i = (Ins){.op = ONop};
-				continue;
-			}
-			for (a=0; a<2; a++)
-				if (rtype(i->arg[a]) == RTmp) {
-					r = cp[i->arg[a].val];
-					assert(!req(r, R));
-					i->arg[a] = r;
-				}
-		}
-		if (rtype(b->jmp.arg) == RTmp) {
-			r = cp[b->jmp.arg.val];
-			assert(!req(r, R));
-			b->jmp.arg = r;
-		}
-	}
-	if (debug['C']) {
-		fprintf(stderr, "\n> Copy information:");
-		for (t=Tmp0; t<fn->ntmp; t++) {
-			if (req(cp[t], R)) {
-				fprintf(stderr, "\n%10s not seen!",
-					fn->tmp[t].name);
-			}
-			else if (!req(cp[t], TMP(t))) {
-				fprintf(stderr, "\n%10s copy of ",
-					fn->tmp[t].name);
-				printref(cp[t], fn, stderr);
-			}
-		}
-		fprintf(stderr, "\n\n> After copy elimination:\n");
-		printfn(fn, stderr);
-	}
-	free(cp);
-}
diff --git a/src/emit.c b/src/emit.c
deleted file mode 100644
index 9b2975d..0000000
--- a/src/emit.c
+++ /dev/null
@@ -1,669 +0,0 @@
-#include "all.h"
-
-char *locprefix, *symprefix;
-
-enum {
-	SLong = 0,
-	SWord = 1,
-	SShort = 2,
-	SByte = 3,
-
-	Ki = -1, /* matches Kw and Kl */
-	Ka = -2, /* matches all classes */
-};
-
-/* Instruction format strings:
- *
- * if the format string starts with -, the instruction
- * is assumed to be 3-address and is put in 2-address
- * mode using an extra mov if necessary
- *
- * if the format string starts with +, the same as the
- * above applies, but commutativity is also assumed
- *
- * %k  is used to set the class of the instruction,
- *     it'll expand to "l", "q", "ss", "sd", depending
- *     on the instruction class
- * %0  designates the first argument
- * %1  designates the second argument
- * %=  designates the result
- *
- * if %k is not used, a prefix to 0, 1, or = must be
- * added, it can be:
- *   M - memory reference
- *   L - long  (64 bits)
- *   W - word  (32 bits)
- *   H - short (16 bits)
- *   B - byte  (8 bits)
- *   S - single precision float
- *   D - double precision float
- */
-static struct {
-	short op;
-	short cls;
-	char *asm;
-} omap[] = {
-	{ OAdd,    Ka, "+add%k %1, %=" },
-	{ OSub,    Ka, "-sub%k %1, %=" },
-	{ OAnd,    Ki, "+and%k %1, %=" },
-	{ OOr,     Ki, "+or%k %1, %=" },
-	{ OXor,    Ki, "+xor%k %1, %=" },
-	{ OSar,    Ki, "-sar%k %B1, %=" },
-	{ OShr,    Ki, "-shr%k %B1, %=" },
-	{ OShl,    Ki, "-shl%k %B1, %=" },
-	{ OMul,    Ki, "+imul%k %1, %=" },
-	{ OMul,    Ks, "+mulss %1, %=" }, /* fixme */
-	{ OMul,    Kd, "+mulsd %1, %=" },
-	{ ODiv,    Ka, "-div%k %1, %=" },
-	{ OStorel, Ka, "movq %L0, %M1" },
-	{ OStorew, Ka, "movl %W0, %M1" },
-	{ OStoreh, Ka, "movw %H0, %M1" },
-	{ OStoreb, Ka, "movb %B0, %M1" },
-	{ OStores, Ka, "movss %S0, %M1" },
-	{ OStored, Ka, "movsd %D0, %M1" },
-	{ OLoad,   Ka, "mov%k %M0, %=" },
-	{ OLoadsw, Kl, "movslq %M0, %L=" },
-	{ OLoadsw, Kw, "movl %M0, %W=" },
-	{ OLoaduw, Ki, "movl %M0, %W=" },
-	{ OLoadsh, Ki, "movsw%k %M0, %=" },
-	{ OLoaduh, Ki, "movzw%k %M0, %=" },
-	{ OLoadsb, Ki, "movsb%k %M0, %=" },
-	{ OLoadub, Ki, "movzb%k %M0, %=" },
-	{ OExtsw,  Kl, "movslq %W0, %L=" },
-	{ OExtuw,  Kl, "movl %W0, %W=" },
-	{ OExtsh,  Ki, "movsw%k %H0, %=" },
-	{ OExtuh,  Ki, "movzw%k %H0, %=" },
-	{ OExtsb,  Ki, "movsb%k %B0, %=" },
-	{ OExtub,  Ki, "movzb%k %B0, %=" },
-
-	{ OExts,   Kd, "cvtss2sd %0, %=" },  /* see if factorization is possible */
-	{ OTruncd, Ks, "cvttsd2ss %0, %=" },
-	{ OFtosi,  Kw, "cvttss2si %0, %=" },
-	{ OFtosi,  Kl, "cvttsd2si %0, %=" },
-	{ OSitof,  Ks, "cvtsi2ss %W0, %=" },
-	{ OSitof,  Kd, "cvtsi2sd %L0, %=" },
-	{ OCast,   Ki, "movq %D0, %L=" },
-	{ OCast,   Ka, "movq %L0, %D=" },
-
-	{ OAddr,   Ki, "lea%k %M0, %=" },
-	{ OSwap,   Ki, "xchg%k %0, %1" },
-	{ OSign,   Kl, "cqto" },
-	{ OSign,   Kw, "cltd" },
-	{ OXDiv,   Ki, "div%k %0" },
-	{ OXIDiv,  Ki, "idiv%k %0" },
-	{ OXCmp,   Ks, "comiss %S0, %S1" },  /* fixme, Kf */
-	{ OXCmp,   Kd, "comisd %D0, %D1" },
-	{ OXCmp,   Ki, "cmp%k %0, %1" },
-	{ OXTest,  Ki, "test%k %0, %1" },
-	{ OXSet+ICeq,  Ki, "setz %B=\n\tmovzb%k %B=, %=" },
-	{ OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" },
-	{ OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" },
-	{ OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" },
-	{ OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" },
-	{ OXSet+ICne,  Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
-	{ OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" },
-	{ OXSet+ICXp,  Ki, "setp %B=\n\tmovsb%k %B=, %=" },
-	{ NOp, 0, 0 }
-};
-
-static char *rname[][4] = {
-	[RAX] = {"rax", "eax", "ax", "al"},
-	[RBX] = {"rbx", "ebx", "bx", "bl"},
-	[RCX] = {"rcx", "ecx", "cx", "cl"},
-	[RDX] = {"rdx", "edx", "dx", "dl"},
-	[RSI] = {"rsi", "esi", "si", "sil"},
-	[RDI] = {"rdi", "edi", "di", "dil"},
-	[RBP] = {"rbp", "ebp", "bp", "bpl"},
-	[RSP] = {"rsp", "esp", "sp", "spl"},
-	[R8 ] = {"r8" , "r8d", "r8w", "r8b"},
-	[R9 ] = {"r9" , "r9d", "r9w", "r9b"},
-	[R10] = {"r10", "r10d", "r10w", "r10b"},
-	[R11] = {"r11", "r11d", "r11w", "r11b"},
-	[R12] = {"r12", "r12d", "r12w", "r12b"},
-	[R13] = {"r13", "r13d", "r13w", "r13b"},
-	[R14] = {"r14", "r14d", "r14w", "r14b"},
-	[R15] = {"r15", "r15d", "r15w", "r15b"},
-};
-
-
-static int
-slot(int s, Fn *fn)
-{
-	struct { int i:14; } x;
-
-	/* sign extend s using a bitfield */
-	x.i = s;
-	assert(NAlign == 3);
-	if (x.i < 0)
-		return -4 * x.i;
-	else {
-		assert(fn->slot >= x.i);
-		return -4 * (fn->slot - x.i);
-	}
-}
-
-static void
-emitcon(Con *con, FILE *f)
-{
-	switch (con->type) {
-	default:
-		diag("emit: invalid constant");
-	case CAddr:
-		fprintf(f, "%s%s", con->local ? locprefix : symprefix, con->label);
-		if (con->bits.i)
-			fprintf(f, "%+"PRId64, con->bits.i);
-		break;
-	case CBits:
-		fprintf(f, "%"PRId64, con->bits.i);
-		break;
-	}
-}
-
-static char *
-regtoa(int reg, int sz)
-{
-	static char buf[6];
-
-	if (reg >= XMM0) {
-		sprintf(buf, "xmm%d", reg-XMM0);
-		return buf;
-	} else
-		return rname[reg][sz];
-}
-
-static Ref
-getarg(char c, Ins *i)
-{
-	switch (c) {
-	default:
-		diag("emit: 0, 1, = expected in format");
-	case '0':
-		return i->arg[0];
-	case '1':
-		return i->arg[1];
-	case '=':
-		return i->to;
-	}
-}
-
-static void emitins(Ins, Fn *, FILE *);
-
-static void
-emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
-{
-	Ins icp;
-
-	icp.op = OCopy;
-	icp.arg[0] = r2;
-	icp.to = r1;
-	icp.cls = k;
-	emitins(icp, fn, f);
-}
-
-static void
-emitf(char *s, Ins *i, Fn *fn, FILE *f)
-{
-	static char clstoa[][3] = {"l", "q", "ss", "sd"};
-	char c;
-	int sz;
-	Ref ref;
-	Mem *m;
-	Con off;
-
-	switch (*s) {
-	case '+':
-		if (req(i->arg[1], i->to)) {
-			ref = i->arg[0];
-			i->arg[0] = i->arg[1];
-			i->arg[1] = ref;
-		}
-		/* fall through */
-	case '-':
-		if (req(i->arg[1], i->to) && !req(i->arg[0], i->to))
-			diag("emit: cannot convert to 2-address");
-		emitcopy(i->to, i->arg[0], i->cls, fn, f);
-		s++;
-		break;
-	}
-
-	fputc('\t', f);
-Next:
-	while ((c = *s++) != '%')
-		if (!c) {
-			fputc('\n', f);
-			return;
-		} else
-			fputc(c, f);
-	switch ((c = *s++)) {
-	default:
-		diag("emit: invalid escape");
-	case '%':
-		fputc('%', f);
-		break;
-	case 'k':
-		fputs(clstoa[i->cls], f);
-		break;
-	case '0':
-	case '1':
-	case '=':
-		sz = KWIDE(i->cls) ? SLong : SWord;
-		s--;
-		/* fall through */
-	case 'D':
-	case 'S':
-	Ref:
-		c = *s++;
-		ref = getarg(c, i);
-		switch (rtype(ref)) {
-		default:
-			diag("emit: invalid reference");
-		case RTmp:
-			assert(isreg(ref));
-			fprintf(f, "%%%s", regtoa(ref.val, sz));
-			break;
-		case RSlot:
-			fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
-			break;
-		case RAMem:
-		Mem:
-			m = &fn->mem[ref.val & AMask];
-			if (rtype(m->base) == RSlot) {
-				off.type = CBits;
-				off.bits.i = slot(m->base.val, fn);
-				addcon(&m->offset, &off);
-				m->base = TMP(RBP);
-			}
-			if (m->offset.type != CUndef)
-				emitcon(&m->offset, f);
-			fputc('(', f);
-			if (req(m->base, R))
-				fprintf(f, "%%rip");
-			else
-				fprintf(f, "%%%s", regtoa(m->base.val, SLong));
-			if (!req(m->index, R))
-				fprintf(f, ", %%%s, %d",
-					regtoa(m->index.val, SLong),
-					m->scale
-				);
-			fputc(')', f);
-			break;
-		case RCon:
-			fputc('$', f);
-			emitcon(&fn->con[ref.val], f);
-			break;
-		}
-		break;
-	case 'L':
-		sz = SLong;
-		goto Ref;
-	case 'W':
-		sz = SWord;
-		goto Ref;
-	case 'H':
-		sz = SShort;
-		goto Ref;
-	case 'B':
-		sz = SByte;
-		goto Ref;
-	case 'M':
-		c = *s++;
-		ref = getarg(c, i);
-		switch (rtype(ref)) {
-		default:
-			diag("emit: invalid memory reference");
-		case RAMem:
-			goto Mem;
-		case RSlot:
-			fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
-			break;
-		case RCon:
-			emitcon(&fn->con[ref.val], f);
-			fprintf(f, "(%%rip)");
-			break;
-		case RTmp:
-			assert(isreg(ref));
-			fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
-			break;
-		}
-		break;
-	}
-	goto Next;
-}
-
-static void
-emitins(Ins i, Fn *fn, FILE *f)
-{
-	Ref r;
-	int64_t val;
-	int o;
-
-	switch (i.op) {
-	default:
-	Table:
-		/* most instructions are just pulled out of
-		 * the table omap[], some special cases are
-		 * detailed below */
-		for (o=0;; o++) {
-			/* this linear search should really be a binary
-			 * search */
-			if (omap[o].op == NOp)
-				diag("emit: no entry found for instruction");
-			if (omap[o].op == i.op)
-			if (omap[o].cls == i.cls
-			|| (omap[o].cls == Ki && KBASE(i.cls) == 0)
-			|| (omap[o].cls == Ka))
-				break;
-		}
-		emitf(omap[o].asm, &i, fn, f);
-		break;
-	case ONop:
-		/* just do nothing for nops, they are inserted
-		 * by some passes */
-		break;
-	case OMul:
-		/* here, we try to use the 3-addresss form
-		 * of multiplication when possible */
-		if (rtype(i.arg[1]) == RCon) {
-			r = i.arg[0];
-			i.arg[0] = i.arg[1];
-			i.arg[1] = r;
-		}
-		if (KBASE(i.cls) == 0 /* only available for ints */
-		&& rtype(i.arg[0]) == RCon
-		&& rtype(i.arg[1]) == RTmp) {
-			emitf("imul%k %0, %1, %=", &i, fn, f);
-			break;
-		}
-		goto Table;
-	case OSub:
-		/* we have to use the negation trick to handle
-		 * some 3-address substractions */
-		if (req(i.to, i.arg[1])) {
-			emitf("neg%k %=", &i, fn, f);
-			emitf("add%k %0, %=", &i, fn, f);
-			break;
-		}
-		goto Table;
-	case OCopy:
-		/* make sure we don't emit useless copies,
-		 * also, we can use a trick to load 64-bits
-		 * registers, it's detailed in my note below
-		 * http://c9x.me/art/notes.html?09/19/2015 */
-		if (req(i.to, R) || req(i.arg[0], R))
-			break;
-		if (isreg(i.to)
-		&& rtype(i.arg[0]) == RCon
-		&& i.cls == Kl
-		&& fn->con[i.arg[0].val].type == CBits
-		&& (val = fn->con[i.arg[0].val].bits.i) >= 0
-		&& val <= UINT32_MAX) {
-			emitf("movl %W0, %W=", &i, fn, f);
-		} else if (isreg(i.to)
-		&& rtype(i.arg[0]) == RCon
-		&& fn->con[i.arg[0].val].type == CAddr) {
-			emitf("lea%k %M0, %=", &i, fn, f);
-		} else if (!req(i.arg[0], i.to))
-			emitf("mov%k %0, %=", &i, fn, f);
-		break;
-	case OCall:
-		/* calls simply have a weird syntax in AT&T
-		 * assembly... */
-		switch (rtype(i.arg[0])) {
-		default:
-			diag("emit: invalid call instruction");
-		case RCon:
-			fprintf(f, "\tcallq ");
-			emitcon(&fn->con[i.arg[0].val], f);
-			fprintf(f, "\n");
-			break;
-		case RTmp:
-			emitf("callq *%L0", &i, fn, f);
-			break;
-		}
-		break;
-	case OSAlloc:
-		/* there is no good reason why this is here
-		 * maybe we should split OSAlloc in 2 different
-		 * instructions depending on the result
-		 */
-		emitf("subq %L0, %%rsp", &i, fn, f);
-		if (!req(i.to, R))
-			emitcopy(i.to, TMP(RSP), Kl, fn, f);
-		break;
-	case OSwap:
-		if (KBASE(i.cls) == 0)
-			goto Table;
-		/* for floats, there is no swap instruction
-		 * so we use xmm15 as a temporary
-		 */
-		emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
-		emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
-		emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
-		break;
-	}
-}
-
-static int
-cneg(int cmp)
-{
-	switch (cmp) {
-	default:   diag("emit: cneg() unhandled comparison");
-	case ICule: return ICugt;
-	case ICult: return ICuge;
-	case ICsle: return ICsgt;
-	case ICslt: return ICsge;
-	case ICsgt: return ICsle;
-	case ICsge: return ICslt;
-	case ICugt: return ICule;
-	case ICuge: return ICult;
-	case ICeq:  return ICne;
-	case ICne:  return ICeq;
-	case ICXnp: return ICXp;
-	case ICXp:  return ICXnp;
-	}
-}
-
-static int
-framesz(Fn *fn)
-{
-	int i, o, f;
-
-	assert(NAlign == 3);
-	for (i=0, o=0; i<NRClob; i++)
-		o ^= 1 & (fn->reg >> rclob[i]);
-	f = fn->slot;
-	f = (f + 3) & -4;
-	return 4*f + 8*o;
-}
-
-void
-emitfn(Fn *fn, FILE *f)
-{
-	static char *ctoa[] = {
-		[ICeq]  = "z",
-		[ICule] = "be",
-		[ICult] = "b",
-		[ICsle] = "le",
-		[ICslt] = "l",
-		[ICsgt] = "g",
-		[ICsge] = "ge",
-		[ICugt] = "a",
-		[ICuge] = "ae",
-		[ICne]  = "nz",
-		[ICXnp] = "np",
-		[ICXp]  = "p"
-	};
-	Blk *b, *s;
-	Ins *i, itmp;
-	int *r, c, fs;
-
-	fprintf(f, ".text\n");
-	if (fn->export)
-		fprintf(f, ".globl %s%s\n", symprefix, fn->name);
-	fprintf(f,
-		"%s%s:\n"
-		"\tpush %%rbp\n"
-		"\tmov %%rsp, %%rbp\n",
-		symprefix, fn->name
-	);
-	fs = framesz(fn);
-	if (fs)
-		fprintf(f, "\tsub $%d, %%rsp\n", fs);
-	for (r=rclob; r-rclob < NRClob; r++)
-		if (fn->reg & BIT(*r)) {
-			itmp.arg[0] = TMP(*r);
-			emitf("pushq %L0", &itmp, fn, f);
-		}
-
-	for (b=fn->start; b; b=b->link) {
-		fprintf(f, "%s%s:\n", locprefix, b->name);
-		for (i=b->ins; i!=&b->ins[b->nins]; i++)
-			emitins(*i, fn, f);
-		switch (b->jmp.type) {
-		case JRet0:
-			for (r=&rclob[NRClob]; r>rclob;)
-				if (fn->reg & BIT(*--r)) {
-					itmp.arg[0] = TMP(*r);
-					emitf("popq %L0", &itmp, fn, f);
-				}
-			fprintf(f,
-				"\tleave\n"
-				"\tret\n"
-			);
-			break;
-		case JJmp:
-			if (b->s1 != b->link)
-				fprintf(f, "\tjmp %s%s\n", locprefix, b->s1->name);
-			break;
-		default:
-			c = b->jmp.type - JXJc;
-			if (0 <= c && c <= NXICmp) {
-				if (b->link == b->s2) {
-					s = b->s1;
-				} else if (b->link == b->s1) {
-					c = cneg(c);
-					s = b->s2;
-				} else
-					diag("emit: unhandled jump (1)");
-				fprintf(f, "\tj%s %s%s\n", ctoa[c], locprefix, s->name);
-				break;
-			}
-			diag("emit: unhandled jump (2)");
-		}
-	}
-
-}
-
-void
-emitdat(Dat *d, FILE *f)
-{
-	static int align;
-	static char *dtoa[] = {
-		[DAlign] = ".align",
-		[DB] = "\t.byte",
-		[DH] = "\t.value",
-		[DW] = "\t.long",
-		[DL] = "\t.quad"
-	};
-
-	switch (d->type) {
-	case DStart:
-		align = 0;
-		fprintf(f, ".data\n");
-		break;
-	case DEnd:
-		break;
-	case DName:
-		if (!align)
-			fprintf(f, ".align 8\n");
-		if (d->export)
-			fprintf(f, ".globl %s%s\n", symprefix, d->u.str);
-		fprintf(f, "%s%s:\n", symprefix, d->u.str);
-		break;
-	case DZ:
-		fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
-		break;
-	default:
-		if (d->type == DAlign)
-			align = 1;
-
-		if (d->isstr) {
-			if (d->type != DB)
-				err("strings only supported for 'b' currently");
-			fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
-		}
-		else if (d->isref) {
-			fprintf(f, "%s %s%+"PRId64"\n",
-				dtoa[d->type], d->u.ref.nam,
-				d->u.ref.off);
-		}
-		else {
-			fprintf(f, "%s %"PRId64"\n",
-				dtoa[d->type], d->u.num);
-		}
-		break;
-	}
-}
-
-typedef struct FBits FBits;
-
-struct FBits {
-	int64_t bits;
-	int wide;
-	FBits *link;
-};
-
-static FBits *stash;
-
-int
-stashfp(int64_t n, int w)
-{
-	FBits **pb, *b;
-	int i;
-
-	/* does a dumb de-dup of fp constants
-	 * this should be the linker's job */
-	for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
-		if (n == b->bits && w == b->wide)
-			return i;
-	b = emalloc(sizeof *b);
-	b->bits = n;
-	b->wide = w;
-	b->link = 0;
-	*pb = b;
-	return i;
-}
-
-void
-emitfin(FILE *f)
-{
-	FBits *b;
-	int i;
-
-	if (!stash)
-		return;
-	fprintf(f, "/* floating point constants */\n");
-	fprintf(f, ".data\n.align 8\n");
-	for (b=stash, i=0; b; b=b->link, i++)
-		if (b->wide)
-			fprintf(f,
-				"%sfp%d:\n"
-				"\t.quad %"PRId64
-				" /* %f */\n",
-				locprefix, i, b->bits,
-				*(double *)&b->bits
-			);
-	for (b=stash, i=0; b; b=b->link, i++)
-		if (!b->wide)
-			fprintf(f,
-				"%sfp%d:\n"
-				"\t.long %"PRId64
-				" /* %lf */\n",
-				locprefix, i, b->bits & 0xffffffff,
-				*(float *)&b->bits
-			);
-	while ((b=stash)) {
-		stash = b->link;
-		free(b);
-	}
-}
diff --git a/src/isel.c b/src/isel.c
deleted file mode 100644
index 2a55733..0000000
--- a/src/isel.c
+++ /dev/null
@@ -1,1136 +0,0 @@
-#include "all.h"
-#include <limits.h>
-
-/* For x86_64, do the following:
- *
- * - lower calls
- * - check that constants are used only in
- *   places allowed
- * - ensure immediates always fit in 32b
- * - explicit machine register contraints
- *   on instructions like division.
- * - implement fast locals (the streak of
- *   constant allocX in the first basic block)
- * - recognize complex addressing modes
- *
- * Invariant: the use counts that are used
- *            in sel() must be sound.  This
- *            is not so trivial, maybe the
- *            dce should be moved out...
- */
-
-typedef struct ANum ANum;
-typedef struct AClass AClass;
-typedef struct RAlloc RAlloc;
-
-struct ANum {
-	char n, l, r;
-	Ins *i;
-	Ref mem;
-};
-
-static void amatch(Addr *, Ref, ANum *, Fn *, int);
-
-static int
-fcmptoi(int fc)
-{
-	switch (fc) {
-	default:   diag("isel: fcmptoi defaulted");
-	case FCle: return ICule;
-	case FClt: return ICult;
-	case FCgt: return ICugt;
-	case FCge: return ICuge;
-	case FCne: return ICne;
-	case FCeq: return ICeq;
-	case FCo:  return ICXnp;
-	case FCuo: return ICXp;
-	}
-}
-
-static int
-iscmp(int op, int *pk, int *pc)
-{
-	int k, c;
-
-	if (OCmpw <= op && op <= OCmpw1) {
-		c = op - OCmpw;
-		k = Kw;
-	}
-	else if (OCmpl <= op && op <= OCmpl1) {
-		c = op - OCmpl;
-		k = Kl;
-	}
-	else if (OCmps <= op && op <= OCmps1) {
-		c = fcmptoi(op - OCmps);
-		k = Ks;
-	}
-	else if (OCmpd <= op && op <= OCmpd1) {
-		c = fcmptoi(op - OCmpd);
-		k = Kd;
-	}
-	else
-		return 0;
-	if (pk)
-		*pk = k;
-	if (pc)
-		*pc = c;
-	return 1;
-}
-
-static int
-noimm(Ref r, Fn *fn)
-{
-	int64_t val;
-
-	if (rtype(r) != RCon)
-		return 0;
-	switch (fn->con[r.val].type) {
-	default:
-		diag("isel: invalid constant");
-	case CAddr:
-		/* we only support the 'small'
-		 * code model of the ABI, this
-		 * means that we can always
-		 * address data with 32bits
-		 */
-		return 0;
-	case CBits:
-		val = fn->con[r.val].bits.i;
-		return (val < INT32_MIN || val > INT32_MAX);
-	}
-}
-
-static int
-rslot(Ref r, Fn *fn)
-{
-	if (rtype(r) != RTmp)
-		return -1;
-	return fn->tmp[r.val].slot;
-}
-
-static int
-argcls(Ins *i, int n)
-{
-	return opdesc[i->op].argcls[n][i->cls];
-}
-
-static void
-fixarg(Ref *r, int k, int phi, Fn *fn)
-{
-	Addr a;
-	Ref r0, r1;
-	int s, n;
-
-	r1 = r0 = *r;
-	s = rslot(r0, fn);
-	if (KBASE(k) == 1 && rtype(r0) == RCon) {
-		/* load floating points from memory
-		 * slots, they can't be used as
-		 * immediates
-		 */
-		r1 = MEM(fn->nmem);
-		vgrow(&fn->mem, ++fn->nmem);
-		memset(&a, 0, sizeof a);
-		a.offset.type = CAddr;
-		a.offset.local = 1;
-		n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
-		sprintf(a.offset.label, "fp%d", n);
-		fn->mem[fn->nmem-1] = a;
-	}
-	else if (!phi && k == Kl && noimm(r0, fn)) {
-		/* load constants that do not fit in
-		 * a 32bit signed integer into a
-		 * long temporary
-		 */
-		r1 = newtmp("isel", Kl, fn);
-		emit(OCopy, Kl, r1, r0, R);
-	}
-	else if (s != -1) {
-		/* load fast locals' addresses into
-		 * temporaries right before the
-		 * instruction
-		 */
-		r1 = newtmp("isel", Kl, fn);
-		emit(OAddr, Kl, r1, SLOT(s), R);
-	}
-	*r = r1;
-}
-
-static void
-chuse(Ref r, int du, Fn *fn)
-{
-	if (rtype(r) == RTmp)
-		fn->tmp[r.val].nuse += du;
-}
-
-static void
-seladdr(Ref *r, ANum *an, Fn *fn)
-{
-	Addr a;
-	Ref r0, r1;
-
-	r0 = *r;
-	if (rtype(r0) == RTmp) {
-		chuse(r0, -1, fn);
-		r1 = an[r0.val].mem;
-		if (req(r1, R)) {
-			amatch(&a, r0, an, fn, 1);
-			vgrow(&fn->mem, ++fn->nmem);
-			fn->mem[fn->nmem-1] = a;
-			r1 = MEM(fn->nmem-1);
-			chuse(a.base, +1, fn);
-			chuse(a.index, +1, fn);
-			if (rtype(a.base) != RTmp)
-			if (rtype(a.index) != RTmp)
-				an[r0.val].mem = r1;
-		}
-		*r = r1;
-	}
-}
-
-static void
-selcmp(Ref arg[2], int k, Fn *fn)
-{
-	Ref r;
-
-	if (rtype(arg[0]) == RCon) {
-		r = arg[1];
-		arg[1] = arg[0];
-		arg[0] = r;
-	}
-	assert(rtype(arg[0]) != RCon);
-	emit(OXCmp, k, R, arg[1], arg[0]);
-	fixarg(&curi->arg[0], k, 0, fn);
-}
-
-static void
-sel(Ins i, ANum *an, Fn *fn)
-{
-	Ref r0, r1;
-	int x, k, kc;
-	int64_t val;
-	Ins *i0;
-
-	if (rtype(i.to) == RTmp)
-	if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
-	if (fn->tmp[i.to.val].nuse == 0) {
-		chuse(i.arg[0], -1, fn);
-		chuse(i.arg[1], -1, fn);
-		return;
-	}
-	i0 = curi;
-	k = i.cls;
-	switch (i.op) {
-	case ODiv:
-	case ORem:
-	case OUDiv:
-	case OURem:
-		if (i.op == ODiv || i.op == OUDiv)
-			r0 = TMP(RAX), r1 = TMP(RDX);
-		else
-			r0 = TMP(RDX), r1 = TMP(RAX);
-		emit(OCopy, k, i.to, r0, R);
-		emit(OCopy, k, R, r1, R);
-		if (rtype(i.arg[1]) == RCon) {
-			/* immediates not allowed for
-			 * divisions in x86
-			 */
-			r0 = newtmp("isel", k, fn);
-		} else
-			r0 = i.arg[1];
-		if (i.op == ODiv || i.op == ORem) {
-			emit(OXIDiv, k, R, r0, R);
-			emit(OSign, k, TMP(RDX), TMP(RAX), R);
-		} else {
-			emit(OXDiv, k, R, r0, R);
-			emit(OCopy, k, TMP(RDX), CON_Z, R);
-		}
-		emit(OCopy, k, TMP(RAX), i.arg[0], R);
-		if (rtype(i.arg[1]) == RCon)
-			emit(OCopy, k, r0, i.arg[1], R);
-		break;
-	case OSar:
-	case OShr:
-	case OShl:
-		if (rtype(i.arg[1]) == RCon)
-			goto Emit;
-		r0 = i.arg[1];
-		i.arg[1] = TMP(RCX);
-		emit(OCopy, Kw, R, TMP(RCX), R);
-		emiti(i);
-		emit(OCopy, Kw, TMP(RCX), r0, R);
-		break;
-	case ONop:
-		break;
-	case OStored:
-	case OStores:
-	case OStorel:
-	case OStorew:
-	case OStoreh:
-	case OStoreb:
-		if (rtype(i.arg[0]) == RCon) {
-			if (i.op == OStored)
-				i.op = OStorel;
-			if (i.op == OStores)
-				i.op = OStorew;
-		}
-		seladdr(&i.arg[1], an, fn);
-		goto Emit;
-	case_OLoad:
-		seladdr(&i.arg[0], an, fn);
-		goto Emit;
-	case OCall:
-	case OSAlloc:
-	case OCopy:
-	case OAdd:
-	case OSub:
-	case OMul:
-	case OAnd:
-	case OOr:
-	case OXor:
-	case OXTest:
-	case OFtosi:
-	case OSitof:
-	case OExts:
-	case OTruncd:
-	case OCast:
-	case_OExt:
-Emit:
-		emiti(i);
-		fixarg(&curi->arg[0], argcls(curi, 0), 0, fn);
-		fixarg(&curi->arg[1], argcls(curi, 1), 0, fn);
-		break;
-	case OAlloc:
-	case OAlloc+1:
-	case OAlloc+2: /* == OAlloc1 */
-		/* we need to make sure
-		 * the stack remains aligned
-		 * (rsp = 0) mod 16
-		 */
-		if (rtype(i.arg[0]) == RCon) {
-			assert(fn->con[i.arg[0].val].type == CBits);
-			val = fn->con[i.arg[0].val].bits.i;
-			val = (val + 15)  & ~INT64_C(15);
-			if (val < 0 || val > INT32_MAX)
-				diag("isel: alloc too large");
-			emit(OSAlloc, Kl, i.to, getcon(val, fn), R);
-		} else {
-			/* r0 = (i.arg[0] + 15) & -16 */
-			r0 = newtmp("isel", Kl, fn);
-			r1 = newtmp("isel", Kl, fn);
-			emit(OSAlloc, Kl, i.to, r0, R);
-			emit(OAnd, Kl, r0, r1, getcon(-16, fn));
-			emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn));
-		}
-		break;
-	default:
-		if (isext(i.op))
-			goto case_OExt;
-		if (isload(i.op))
-			goto case_OLoad;
-		if (iscmp(i.op, &kc, &x)) {
-			if (rtype(i.arg[0]) == RCon)
-				x = icmpop(x);
-			emit(OXSet+x, k, i.to, R, R);
-			selcmp(i.arg, kc, fn);
-			break;
-		}
-		diag("isel: non-exhaustive implementation");
-	}
-
-	while (i0 > curi && --i0)
-		if (rslot(i0->arg[0], fn) != -1
-		||  rslot(i0->arg[1], fn) != -1)
-			diag("isel: usupported address argument");
-}
-
-static Ins *
-flagi(Ins *i0, Ins *i)
-{
-	while (i>i0) {
-		i--;
-		if (opdesc[i->op].sflag)
-			return i;
-		if (opdesc[i->op].lflag)
-			continue;
-		return 0;
-	}
-	return 0;
-}
-
-struct AClass {
-	int inmem;
-	int align;
-	uint size;
-	int cls[2];
-};
-
-static void
-aclass(AClass *a, Typ *t)
-{
-	int e, s, n, cls;
-	uint sz, al;
-
-	sz = t->size;
-	al = 1u << t->align;
-
-	/* the ABI requires sizes to be rounded
-	 * up to the nearest multiple of 8, moreover
-	 * it makes it easy load and store structures
-	 * in registers
-	 */
-	if (al < 8)
-		al = 8;
-	sz = (sz + al-1) & -al;
-
-	a->size = sz;
-	a->align = t->align;
-
-	if (t->dark || sz > 16) {
-		/* large or unaligned structures are
-		 * required to be passed in memory
-		 */
-		a->inmem = 1;
-		return;
-	}
-
-	a->inmem = 0;
-	for (e=0, s=0; e<2; e++) {
-		cls = -1;
-		for (n=0; n<8 && t->seg[s].len; s++) {
-			if (t->seg[s].ispad) {
-				/* don't change anything */
-			}
-			else if (t->seg[s].isflt) {
-				if (cls == -1)
-					cls = Kd;
-			}
-			else
-				cls = Kl;
-			n += t->seg[s].len;
-		}
-		assert(n <= 8);
-		a->cls[e] = cls;
-	}
-}
-
-static void
-blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
-{
-	Ref r, r1;
-	uint boff;
-
-	/* it's an impolite blit, we might go across the end
-	 * of the source object a little bit... */
-	for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
-		r = newtmp("abi", Kl, fn);
-		r1 = newtmp("abi", Kl, fn);
-		emit(OStorel, 0, R, r, r1);
-		emit(OAdd, Kl, r1, rstk, getcon(soff, fn));
-		r1 = newtmp("abi", Kl, fn);
-		emit(OLoad, Kl, r, r1, R);
-		emit(OAdd, Kl, r1, rsrc, getcon(boff, fn));
-		chuse(rsrc, +1, fn);
-		chuse(rstk, +1, fn);
-	}
-}
-
-static int
-retr(Ref reg[2], AClass *aret)
-{
-	static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
-	int n, k, ca, nr[2];
-
-	nr[0] = nr[1] = 0;
-	ca = 0;
-	for (n=0; aret->cls[n]>=0 && n<2; n++) {
-		k = KBASE(aret->cls[n]);
-		reg[n] = TMP(retreg[k][nr[k]++]);
-		ca += 1 << (2 * k);
-	}
-	return ca;
-}
-
-static void
-selret(Blk *b, Fn *fn)
-{
-	int j, k, ca;
-	Ref r, r0, reg[2];
-	AClass aret;
-
-	j = b->jmp.type;
-
-	if (!isret(j) || j == JRet0)
-		return;
-
-	r0 = b->jmp.arg;
-	b->jmp.type = JRet0;
-
-	if (j == JRetc) {
-		aclass(&aret, &typ[fn->retty]);
-		if (aret.inmem) {
-			assert(rtype(fn->retr) == RTmp);
-			emit(OCopy, Kl, TMP(RAX), fn->retr, R);
-			chuse(fn->retr, +1, fn);
-			blit(fn->retr, 0, r0, aret.size, fn);
-			ca = 1;
-		} else {
-			ca = retr(reg, &aret);
-			if (aret.size > 8) {
-				r = newtmp("abi", Kl, fn);
-				emit(OLoad, Kl, reg[1], r, R);
-				emit(OAdd, Kl, r, r0, getcon(8, fn));
-				chuse(r0, +1, fn);
-			}
-			emit(OLoad, Kl, reg[0], r0, R);
-		}
-	} else {
-		k = j - JRetw;
-		if (KBASE(k) == 0) {
-			emit(OCopy, k, TMP(RAX), r0, R);
-			ca = 1;
-		} else {
-			emit(OCopy, k, TMP(XMM0), r0, R);
-			ca = 1 << 2;
-		}
-	}
-
-	b->jmp.arg = CALL(ca);
-}
-
-static void
-seljmp(Blk *b, Fn *fn)
-{
-	Ref r;
-	int c, k;
-	Ins *fi;
-
-	if (b->jmp.type == JRet0 || b->jmp.type == JJmp)
-		return;
-	assert(b->jmp.type == JJnz);
-	r = b->jmp.arg;
-	b->jmp.arg = R;
-	assert(!req(r, R));
-	if (rtype(r) == RCon) {
-		b->jmp.type = JJmp;
-		if (req(r, CON_Z))
-			b->s1 = b->s2;
-		b->s2 = 0;
-		return;
-	}
-	fi = flagi(b->ins, &b->ins[b->nins]);
-	if (fi && req(fi->to, r)) {
-		if (iscmp(fi->op, &k, &c)) {
-			if (rtype(fi->arg[0]) == RCon)
-				c = icmpop(c);
-			b->jmp.type = JXJc + c;
-			if (fn->tmp[r.val].nuse == 1) {
-				assert(fn->tmp[r.val].ndef == 1);
-				selcmp(fi->arg, k, fn);
-				*fi = (Ins){.op = ONop};
-			}
-			return;
-		}
-		if (fi->op == OAnd && fn->tmp[r.val].nuse == 1
-		&& (rtype(fi->arg[0]) == RTmp ||
-		    rtype(fi->arg[1]) == RTmp)) {
-			fi->op = OXTest;
-			fi->to = R;
-			b->jmp.type = JXJc + ICne;
-			if (rtype(fi->arg[1]) == RCon) {
-				r = fi->arg[1];
-				fi->arg[1] = fi->arg[0];
-				fi->arg[0] = r;
-			}
-			return;
-		}
-		/* since flags are not tracked in liveness,
-		 * the result of the flag-setting instruction
-		 * has to be marked as live
-		 */
-		if (fn->tmp[r.val].nuse == 1)
-			emit(OCopy, Kw, R, r, R);
-		b->jmp.type = JXJc + ICne;
-		return;
-	}
-	selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */
-	b->jmp.type = JXJc + ICne;
-}
-
-static int
-classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret)
-{
-	int nint, ni, nsse, ns, n, *pn;
-	AClass *a;
-	Ins *i;
-
-	if (aret && aret->inmem)
-		nint = 5; /* hidden argument */
-	else
-		nint = 6;
-	nsse = 8;
-	for (i=i0, a=ac; i<i1; i++, a++) {
-		if (i->op == op) {
-			if (KBASE(i->cls) == 0)
-				pn = &nint;
-			else
-				pn = &nsse;
-			if (*pn > 0) {
-				--*pn;
-				a->inmem = 0;
-			} else
-				a->inmem = 2;
-			a->align = 3;
-			a->size = 8;
-			a->cls[0] = i->cls;
-		} else {
-			n = i->arg[0].val & AMask;
-			aclass(a, &typ[n]);
-			if (a->inmem)
-				continue;
-			ni = ns = 0;
-			for (n=0; n<2; n++)
-				if (KBASE(a->cls[n]) == 0)
-					ni++;
-				else
-					ns++;
-			if (nint >= ni && nsse >= ns) {
-				nint -= ni;
-				nsse -= ns;
-			} else
-				a->inmem = 1;
-		}
-	}
-
-	return ((6-nint) << 4) | ((8-nsse) << 8);
-}
-
-int rsave[] = {
-	RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
-	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
-};
-int rclob[] = {RBX, R12, R13, R14, R15};
-
-MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
-MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
-
-bits
-retregs(Ref r, int p[2])
-{
-	bits b;
-	int ni, nf;
-
-	assert(rtype(r) == RACall);
-	b = 0;
-	ni = r.val & 3;
-	nf = (r.val >> 2) & 3;
-	if (ni >= 1)
-		b |= BIT(RAX);
-	if (ni >= 2)
-		b |= BIT(RDX);
-	if (nf >= 1)
-		b |= BIT(XMM0);
-	if (nf >= 2)
-		b |= BIT(XMM1);
-	if (p) {
-		p[0] = ni;
-		p[1] = nf;
-	}
-	return b;
-}
-
-bits
-argregs(Ref r, int p[2])
-{
-	bits b;
-	int j, ni, nf;
-
-	assert(rtype(r) == RACall);
-	b = 0;
-	ni = (r.val >> 4) & 15;
-	nf = (r.val >> 8) & 15;
-	for (j=0; j<ni; j++)
-		b |= BIT(rsave[j]);
-	for (j=0; j<nf; j++)
-		b |= BIT(XMM0+j);
-	if (p) {
-		p[0] = ni + 1;
-		p[1] = nf;
-	}
-	return b | BIT(RAX);
-}
-
-static Ref
-rarg(int ty, int *ni, int *ns)
-{
-	if (KBASE(ty) == 0)
-		return TMP(rsave[(*ni)++]);
-	else
-		return TMP(XMM0 + (*ns)++);
-}
-
-struct RAlloc {
-	Ins i;
-	RAlloc *link;
-};
-
-static void
-selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
-{
-	Ins *i;
-	AClass *ac, *a, aret;
-	int ca, ni, ns;
-	uint stk, off;
-	Ref r, r1, r2, reg[2], regcp[2];
-	RAlloc *ra;
-
-	ac = alloc((i1-i0) * sizeof ac[0]);
-	if (!req(i1->arg[1], R)) {
-		assert(rtype(i1->arg[1]) == RAType);
-		aclass(&aret, &typ[i1->arg[1].val & AMask]);
-		ca = classify(i0, i1, ac, OArg, &aret);
-	} else
-		ca = classify(i0, i1, ac, OArg, 0);
-
-	for (stk=0, a=&ac[i1-i0]; a>ac;)
-		if ((--a)->inmem) {
-			assert(a->align <= 4);
-			stk += a->size;
-			if (a->align == 4)
-				stk += stk & 15;
-		}
-	stk += stk & 15;
-	if (stk) {
-		r = getcon(-(int64_t)stk, fn);
-		emit(OSAlloc, Kl, R, r, R);
-	}
-
-	if (!req(i1->arg[1], R)) {
-		if (aret.inmem) {
-			/* get the return location from eax
-			 * it saves one callee-save reg */
-			r1 = newtmp("abi", Kl, fn);
-			emit(OCopy, Kl, i1->to, TMP(RAX), R);
-			ca += 1;
-		} else {
-			if (aret.size > 8) {
-				r = newtmp("abi", Kl, fn);
-				regcp[1] = newtmp("abi", aret.cls[1], fn);
-				emit(OStorel, 0, R, regcp[1], r);
-				emit(OAdd, Kl, r, i1->to, getcon(8, fn));
-				chuse(i1->to, +1, fn);
-				ca += 1 << (2 * KBASE(aret.cls[1]));
-			}
-			regcp[0] = newtmp("abi", aret.cls[0], fn);
-			emit(OStorel, 0, R, regcp[0], i1->to);
-			ca += 1 << (2 * KBASE(aret.cls[0]));
-			retr(reg, &aret);
-			if (aret.size > 8)
-				emit(OCopy, aret.cls[1], regcp[1], reg[1], R);
-			emit(OCopy, aret.cls[0], regcp[0], reg[0], R);
-			r1 = i1->to;
-		}
-		/* allocate return pad */
-		ra = alloc(sizeof *ra);
-		assert(NAlign == 3);
-		aret.align -= 2;
-		if (aret.align < 0)
-			aret.align = 0;
-		ra->i.op = OAlloc + aret.align;
-		ra->i.cls = Kl;
-		ra->i.to = r1;
-		ra->i.arg[0] = getcon(aret.size, fn);
-		ra->link = (*rap);
-		*rap = ra;
-	} else {
-		ra = 0;
-		if (KBASE(i1->cls) == 0) {
-			emit(OCopy, i1->cls, i1->to, TMP(RAX), R);
-			ca += 1;
-		} else {
-			emit(OCopy, i1->cls, i1->to, TMP(XMM0), R);
-			ca += 1 << 2;
-		}
-	}
-	emit(OCall, i1->cls, R, i1->arg[0], CALL(ca));
-	emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
-
-	ni = ns = 0;
-	if (ra && aret.inmem)
-		emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
-	for (i=i0, a=ac; i<i1; i++, a++) {
-		if (a->inmem)
-			continue;
-		r1 = rarg(a->cls[0], &ni, &ns);
-		if (i->op == OArgc) {
-			if (a->size > 8) {
-				r2 = rarg(a->cls[1], &ni, &ns);
-				r = newtmp("abi", Kl, fn);
-				emit(OLoad, a->cls[1], r2, r, R);
-				emit(OAdd, Kl, r, i->arg[1], getcon(8, fn));
-				chuse(i->arg[1], +1, fn);
-			}
-			emit(OLoad, a->cls[0], r1, i->arg[1], R);
-		} else
-			emit(OCopy, i->cls, r1, i->arg[0], R);
-	}
-
-	if (!stk)
-		return;
-
-	r = newtmp("abi", Kl, fn);
-	chuse(r, -1, fn);
-	for (i=i0, a=ac, off=0; i<i1; i++, a++) {
-		if (!a->inmem)
-			continue;
-		if (i->op == OArgc) {
-			if (a->align == 4)
-				off += off & 15;
-			blit(r, off, i->arg[1], a->size, fn);
-		} else {
-			r1 = newtmp("abi", Kl, fn);
-			emit(OStorel, 0, R, i->arg[0], r1);
-			emit(OAdd, Kl, r1, r, getcon(off, fn));
-			chuse(r, +1, fn);
-		}
-		off += a->size;
-	}
-	emit(OSAlloc, Kl, r, getcon(stk, fn), R);
-}
-
-static void
-selpar(Fn *fn, Ins *i0, Ins *i1)
-{
-	AClass *ac, *a, aret;
-	Ins *i;
-	int ni, ns, s, al;
-	Ref r, r1;
-
-	ac = alloc((i1-i0) * sizeof ac[0]);
-	curi = insb;
-	ni = ns = 0;
-
-	if (fn->retty >= 0) {
-		aclass(&aret, &typ[fn->retty]);
-		if (aret.inmem) {
-			r = newtmp("abi", Kl, fn);
-			*curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl};
-			fn->retr = r;
-		}
-		classify(i0, i1, ac, OPar, &aret);
-	} else
-		classify(i0, i1, ac, OPar, 0);
-
-	assert(NAlign == 3);
-
-	s = 4;
-	for (i=i0, a=ac; i<i1; i++, a++) {
-		switch (a->inmem) {
-		case 1:
-			assert(a->align <= 4);
-			if (a->align == 4)
-				s = (s+3) & -4;
-			fn->tmp[i->to.val].slot = -s; /* HACK! */
-			s += a->size / 4;
-			continue;
-		case 2:
-			*curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls};
-			s += 2;
-			continue;
-		}
-		r1 = rarg(a->cls[0], &ni, &ns);
-		if (i->op == OParc) {
-			r = newtmp("abi", Kl, fn);
-			*curi++ = (Ins){OCopy, r, {r1}, Kl};
-			a->cls[0] = r.val;
-			if (a->size > 8) {
-				r1 = rarg(a->cls[1], &ni, &ns);
-				r = newtmp("abi", Kl, fn);
-				*curi++ = (Ins){OCopy, r, {r1}, Kl};
-				a->cls[1] = r.val;
-			}
-		} else
-			*curi++ = (Ins){OCopy, i->to, {r1}, i->cls};
-	}
-	for (i=i0, a=ac; i<i1; i++, a++) {
-		if (i->op != OParc || a->inmem)
-			continue;
-		assert(NAlign == 3);
-		for (al=0; a->align >> (al+2); al++)
-			;
-		r = TMP(a->cls[0]);
-		r1 = i->to;
-		*curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl};
-		*curi++ = (Ins){OStorel, R, {r, r1}, 0};
-		if (a->size > 8) {
-			r = newtmp("abi", Kl, fn);
-			*curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl};
-			r1 = TMP(a->cls[1]);
-			*curi++ = (Ins){OStorel, R, {r1, r}, 0};
-		}
-	}
-}
-
-static int
-aref(Ref r, ANum *ai)
-{
-	switch (rtype(r)) {
-	default:
-		diag("isel: aref defaulted");
-	case RCon:
-		return 2;
-	case RTmp:
-		return ai[r.val].n;
-	}
-}
-
-static int
-ascale(Ref r, Con *con)
-{
-	int64_t n;
-
-	if (rtype(r) != RCon)
-		return 0;
-	if (con[r.val].type != CBits)
-		return 0;
-	n = con[r.val].bits.i;
-	return n == 1 || n == 2 || n == 4 || n == 8;
-}
-
-static void
-anumber(ANum *ai, Blk *b, Con *con)
-{
-	/* This should be made obsolete by a proper
-	 * reassoc pass.
-	 *
-	 * Rules:
-	 *
-	 *   RTmp(_) -> 0    tmp
-	 *   ( RTmp(_) -> 1    slot )
-	 *   RCon(_) -> 2    con
-	 *   0 * 2   -> 3    s * i (when constant is 1,2,4,8)
-	 */
-	static char add[10][10] = {
-		[2] [2] = 2,              /* folding */
-		[2] [5] = 5, [5] [2] = 5,
-		[2] [6] = 6, [6] [2] = 6,
-		[2] [7] = 7, [7] [2] = 7,
-		[0] [0] = 4,              /* 4: b + s * i */
-		[0] [3] = 4, [3] [0] = 4,
-		[2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
-		[0] [2] = 6, [2] [0] = 6, /* 6: o + b */
-		[2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
-		[0] [5] = 7, [5] [0] = 7,
-		[6] [3] = 7, [3] [6] = 7,
-
-	};
-	int a, a1, a2, n1, n2, t1, t2;
-	Ins *i;
-
-	for (i=b->ins; i-b->ins < b->nins; i++) {
-		if (rtype(i->to) == RTmp)
-			ai[i->to.val].i = i;
-		if (i->op != OAdd && i->op != OMul)
-			continue;
-		a1 = aref(i->arg[0], ai);
-		a2 = aref(i->arg[1], ai);
-		t1 = a1 != 1 && a1 != 2;
-		t2 = a2 != 1 && a2 != 2;
-		if (i->op == OAdd) {
-			a = add[n1 = a1][n2 = a2];
-			if (t1 && a < add[0][a2])
-				a = add[n1 = 0][n2 = a2];
-			if (t2 && a < add[a1][0])
-				a = add[n1 = a1][n2 = 0];
-			if (t1 && t2 && a < add[0][0])
-				a = add[n1 = 0][n2 = 0];
-		} else {
-			n1 = n2 = a = 0;
-			if (ascale(i->arg[0], con) && t2)
-				a = 3, n1 = 2, n2 = 0;
-			if (t1 && ascale(i->arg[1], con))
-				a = 3, n1 = 0, n2 = 2;
-		}
-		ai[i->to.val].n = a;
-		ai[i->to.val].l = n1;
-		ai[i->to.val].r = n2;
-	}
-}
-
-static void
-amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
-{
-	Ins *i;
-	int nl, nr, t, s;
-	Ref al, ar;
-
-	if (top)
-		memset(a, 0, sizeof *a);
-	if (rtype(r) == RCon) {
-		addcon(&a->offset, &fn->con[r.val]);
-		return;
-	}
-	assert(rtype(r) == RTmp);
-	i = ai[r.val].i;
-	nl = ai[r.val].l;
-	nr = ai[r.val].r;
-	if (i) {
-		if (nl > nr) {
-			al = i->arg[1];
-			ar = i->arg[0];
-			t = nl, nl = nr, nr = t;
-		} else {
-			al = i->arg[0];
-			ar = i->arg[1];
-		}
-	}
-	switch (ai[r.val].n) {
-	default:
-		diag("isel: amatch defaulted");
-	case 3: /* s * i */
-		if (!top) {
-			a->index = al;
-			a->scale = fn->con[ar.val].bits.i;
-		} else
-			a->base = r;
-		break;
-	case 4: /* b + s * i */
-		switch (nr) {
-		case 0:
-			if (fn->tmp[ar.val].slot != -1) {
-				al = i->arg[1];
-				ar = i->arg[0];
-			}
-			a->index = ar;
-			a->scale = 1;
-			break;
-		case 3:
-			amatch(a, ar, ai, fn, 0);
-			break;
-		}
-		r = al;
-	case 0:
-		s = fn->tmp[r.val].slot;
-		if (s != -1)
-			r = SLOT(s);
-		a->base = r;
-		break;
-	case 2: /* constants */
-	case 5: /* o + s * i */
-	case 6: /* o + b */
-	case 7: /* o + b + s * i */
-		amatch(a, ar, ai, fn, 0);
-		amatch(a, al, ai, fn, 0);
-		break;
-	}
-}
-
-/* instruction selection
- * requires use counts (as given by parsing)
- */
-void
-isel(Fn *fn)
-{
-	Blk *b, **sb;
-	Ins *i, *i0, *ip;
-	Phi *p;
-	uint a;
-	int n, al;
-	int64_t sz;
-	ANum *ainfo;
-	RAlloc *ral;
-
-	for (n=0; n<fn->ntmp; n++)
-		fn->tmp[n].slot = -1;
-	fn->slot = 0;
-
-	/* lower arguments */
-	for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
-		if (i->op != OPar && i->op != OParc)
-			break;
-	selpar(fn, b->ins, i);
-	n = b->nins - (i - b->ins) + (curi - insb);
-	i0 = alloc(n * sizeof(Ins));
-	ip = icpy(ip = i0, insb, curi - insb);
-	ip = icpy(ip, i, &b->ins[b->nins] - i);
-	b->nins = n;
-	b->ins = i0;
-
-	/* lower function calls and returns */
-	ral = 0;
-	b = fn->start;
-	do {
-		if (!(b = b->link))
-			b = fn->start; /* do it last */
-		curi = &insb[NIns];
-		selret(b, fn);
-		for (i=&b->ins[b->nins]; i!=b->ins;) {
-			if ((--i)->op == OCall) {
-				for (i0=i; i0>b->ins; i0--)
-					if ((i0-1)->op != OArg)
-					if ((i0-1)->op != OArgc)
-						break;
-				selcall(fn, i0, i, &ral);
-				i = i0;
-				continue;
-			}
-			assert(i->op != OArg && i->op != OArgc);
-			emiti(*i);
-		}
-		if (b == fn->start)
-			for (; ral; ral=ral->link)
-				emiti(ral->i);
-		b->nins = &insb[NIns] - curi;
-		idup(&b->ins, curi, b->nins);
-	} while (b != fn->start);
-
-	if (debug['A']) {
-		fprintf(stderr, "\n> After call lowering:\n");
-		printfn(fn, stderr);
-	}
-
-	/* assign slots to fast allocs */
-	b = fn->start;
-	assert(NAlign == 3 && "change n=4 and sz /= 4 below");
-	for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2)
-		for (i=b->ins; i-b->ins < b->nins; i++)
-			if (i->op == al) {
-				if (rtype(i->arg[0]) != RCon)
-					break;
-				sz = fn->con[i->arg[0].val].bits.i;
-				if (sz < 0 || sz >= INT_MAX-3)
-					diag("isel: invalid alloc size");
-				sz = (sz + n-1) & -n;
-				sz /= 4;
-				fn->tmp[i->to.val].slot = fn->slot;
-				fn->slot += sz;
-				*i = (Ins){.op = ONop};
-			}
-
-	/* process basic blocks */
-	n = fn->ntmp;
-	ainfo = emalloc(n * sizeof ainfo[0]);
-	for (b=fn->start; b; b=b->link) {
-		curi = &insb[NIns];
-		for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
-			for (p=(*sb)->phi; p; p=p->link) {
-				for (a=0; p->blk[a] != b; a++)
-					assert(a+1 < p->narg);
-				fixarg(&p->arg[a], p->cls, 1, fn);
-			}
-		memset(ainfo, 0, n * sizeof ainfo[0]);
-		anumber(ainfo, b, fn->con);
-		seljmp(b, fn);
-		for (i=&b->ins[b->nins]; i!=b->ins;)
-			sel(*--i, ainfo, fn);
-		b->nins = &insb[NIns] - curi;
-		idup(&b->ins, curi, b->nins);
-	}
-	free(ainfo);
-
-	if (debug['I']) {
-		fprintf(stderr, "\n> After instruction selection:\n");
-		printfn(fn, stderr);
-	}
-}
diff --git a/src/live.c b/src/live.c
deleted file mode 100644
index 44806e1..0000000
--- a/src/live.c
+++ /dev/null
@@ -1,174 +0,0 @@
-#include "all.h"
-
-void
-liveon(BSet *v, Blk *b, Blk *s)
-{
-	Phi *p;
-	uint a;
-
-	bscopy(v, s->in);
-	for (p=s->phi; p; p=p->link) {
-		bsclr(v, p->to.val);
-		for (a=0; a<p->narg; a++)
-			if (p->blk[a] == b)
-			if (rtype(p->arg[a]) == RTmp)
-				bsset(v, p->arg[a].val);
-	}
-}
-
-static int
-phitmp(int t, Tmp *tmp)
-{
-	int tp;
-
-	tp = tmp[t].phi;
-	return tp ? tp : t;
-}
-
-static void
-phifix(int t1, short *phi, Tmp *tmp)
-{
-	int t, t2;
-
-	/* detect temporaries arguments
-	 * of the same phi node that
-	 * interfere and separate them
-	 */
-	t = phitmp(t1, tmp);
-	t2 = phi[t];
-	if (t2 && t2 != t1) {
-		if (t != t1) {
-			tmp[t1].phi = t1;
-			t = t1;
-		} else {
-			tmp[t2].phi = t2;
-			phi[t2] = t2;
-		}
-	}
-	phi[t] = t1;
-}
-
-static void
-bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp)
-{
-
-	if (rtype(r) != RTmp)
-		return;
-	bsset(b->gen, r.val);
-	phifix(r.val, phi, tmp);
-	if (!bshas(b->in, r.val)) {
-		nlv[KBASE(tmp[r.val].cls)]++;
-		bsset(b->in, r.val);
-	}
-}
-
-/* liveness analysis
- * requires rpo computation
- */
-void
-filllive(Fn *f)
-{
-	Blk *b;
-	Ins *i;
-	int k, t, m[2], n, chg, nlv[2];
-	short *phi;
-	BSet u[1], v[1];
-	Mem *ma;
-
-	bsinit(u, f->ntmp);
-	bsinit(v, f->ntmp);
-	phi = emalloc(f->ntmp * sizeof phi[0]);
-	for (b=f->start; b; b=b->link) {
-		bsinit(b->in, f->ntmp);
-		bsinit(b->out, f->ntmp);
-		bsinit(b->gen, f->ntmp);
-	}
-	chg = 1;
-Again:
-	for (n=f->nblk-1; n>=0; n--) {
-		b = f->rpo[n];
-
-		bscopy(u, b->out);
-		if (b->s1) {
-			liveon(v, b, b->s1);
-			bsunion(b->out, v);
-		}
-		if (b->s2) {
-			liveon(v, b, b->s2);
-			bsunion(b->out, v);
-		}
-		chg |= !bsequal(b->out, u);
-
-		memset(phi, 0, f->ntmp * sizeof phi[0]);
-		memset(nlv, 0, sizeof nlv);
-		bscopy(b->in, b->out);
-		for (t=0; t<f->ntmp; t++)
-			if (bshas(b->in, t)) {
-				phifix(t, phi, f->tmp);
-				nlv[KBASE(f->tmp[t].cls)]++;
-			}
-		if (rtype(b->jmp.arg) == RACall) {
-			assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0);
-			b->in->t[0] |= retregs(b->jmp.arg, nlv);
-		} else
-			bset(b->jmp.arg, b, nlv, phi, f->tmp);
-		for (k=0; k<2; k++)
-			b->nlive[k] = nlv[k];
-		for (i=&b->ins[b->nins]; i!=b->ins;) {
-			if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) {
-				b->in->t[0] &= ~retregs(i->arg[1], m);
-				for (k=0; k<2; k++)
-					nlv[k] -= m[k];
-				if (nlv[0] + NISave > b->nlive[0])
-					b->nlive[0] = nlv[0] + NISave;
-				if (nlv[1] + NFSave > b->nlive[1])
-					b->nlive[1] = nlv[1] + NFSave;
-				b->in->t[0] |= argregs(i->arg[1], m);
-				for (k=0; k<2; k++)
-					nlv[k] += m[k];
-			}
-			if (!req(i->to, R)) {
-				assert(rtype(i->to) == RTmp);
-				t = i->to.val;
-				if (bshas(b->in, i->to.val))
-					nlv[KBASE(f->tmp[t].cls)]--;
-				bsset(b->gen, t);
-				bsclr(b->in, t);
-				phi[phitmp(t, f->tmp)] = 0;
-			}
-			for (k=0; k<2; k++)
-				switch (rtype(i->arg[k])) {
-				case RAMem:
-					ma = &f->mem[i->arg[k].val & AMask];
-					bset(ma->base, b, nlv, phi, f->tmp);
-					bset(ma->index, b, nlv, phi, f->tmp);
-					break;
-				default:
-					bset(i->arg[k], b, nlv, phi, f->tmp);
-					break;
-				}
-			for (k=0; k<2; k++)
-				if (nlv[k] > b->nlive[k])
-					b->nlive[k] = nlv[k];
-		}
-	}
-	if (chg) {
-		chg = 0;
-		goto Again;
-	}
-	free(phi);
-
-	if (debug['L']) {
-		fprintf(stderr, "\n> Liveness analysis:\n");
-		for (b=f->start; b; b=b->link) {
-			fprintf(stderr, "\t%-10sin:   ", b->name);
-			dumpts(b->in, f->tmp, stderr);
-			fprintf(stderr, "\t          out:  ");
-			dumpts(b->out, f->tmp, stderr);
-			fprintf(stderr, "\t          gen:  ");
-			dumpts(b->gen, f->tmp, stderr);
-			fprintf(stderr, "\t          live: ");
-			fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]);
-		}
-	}
-}
diff --git a/src/main.c b/src/main.c
deleted file mode 100644
index c1664be..0000000
--- a/src/main.c
+++ /dev/null
@@ -1,140 +0,0 @@
-#include "all.h"
-#include "config.h"
-#include <ctype.h>
-#include <getopt.h>
-
-char debug['Z'+1] = {
-	['P'] = 0, /* parsing */
-	['A'] = 0, /* abi lowering */
-	['I'] = 0, /* instruction selection */
-	['L'] = 0, /* liveness */
-	['M'] = 0, /* memory optimization */
-	['N'] = 0, /* ssa construction */
-	['C'] = 0, /* copy elimination */
-	['S'] = 0, /* spilling */
-	['R'] = 0, /* reg. allocation */
-};
-
-static FILE *outf;
-static int dbg;
-
-static void
-data(Dat *d)
-{
-	if (dbg)
-		return;
-	if (d->type == DEnd) {
-		fputs("/* end data */\n\n", outf);
-		freeall();
-	}
-	emitdat(d, outf);
-}
-
-static void
-func(Fn *fn)
-{
-	int n;
-
-	if (dbg)
-		fprintf(stderr, "**** Function %s ****", fn->name);
-	if (debug['P']) {
-		fprintf(stderr, "\n> After parsing:\n");
-		printfn(fn, stderr);
-	}
-	fillrpo(fn);
-	fillpreds(fn);
-	filluse(fn);
-	memopt(fn);
-	ssa(fn);
-	filluse(fn);
-	copy(fn);
-	filluse(fn);
-	isel(fn);
-	filllive(fn);
-	fillcost(fn);
-	spill(fn);
-	rega(fn);
-	fillrpo(fn);
-	assert(fn->rpo[0] == fn->start);
-	for (n=0;; n++)
-		if (n == fn->nblk-1) {
-			fn->rpo[n]->link = 0;
-			break;
-		} else
-			fn->rpo[n]->link = fn->rpo[n+1];
-	if (!dbg) {
-		emitfn(fn, outf);
-		fprintf(outf, "/* end function %s */\n\n", fn->name);
-	} else
-		fprintf(stderr, "\n");
-	freeall();
-}
-
-int
-main(int ac, char *av[])
-{
-	FILE *inf;
-	char *f;
-	int c, asm;
-
-	asm = Defaultasm;
-	outf = stdout;
-	while ((c = getopt(ac, av, "d:o:G:")) != -1)
-		switch (c) {
-		case 'd':
-			for (; *optarg; optarg++)
-				if (isalpha(*optarg)) {
-					debug[toupper(*optarg)] = 1;
-					dbg = 1;
-				}
-			break;
-		case 'o':
-			if (strcmp(optarg, "-") != 0)
-				outf = fopen(optarg, "w");
-			break;
-		case 'G':
-			if (strcmp(optarg, "e") == 0)
-				asm = Gaself;
-			else if (strcmp(optarg, "m") == 0)
-				asm = Gasmacho;
-			else {
-				fprintf(stderr, "unknown gas flavor '%s'\n", optarg);
-				exit(1);
-			}
-			break;
-		default:
-			fprintf(stderr, "usage: %s [-d <flags>] [-o out] {file.ssa, -}\n", av[0]);
-			exit(1);
-		}
-
-	switch (asm) {
-	case Gaself:
-		locprefix = ".L";
-		symprefix = "";
-		break;
-	case Gasmacho:
-		locprefix = "L";
-		symprefix = "_";
-		break;
-	}
-
-	do {
-		f = av[optind];
-		if (!f || strcmp(f, "-") == 0) {
-			inf = stdin;
-			f = "-";
-		} else {
-			inf = fopen(f, "r");
-			if (!inf) {
-				fprintf(stderr, "cannot open '%s'\n", f);
-				exit(1);
-			}
-		}
-		parse(inf, f, data, func);
-	} while (++optind < ac);
-
-	if (!dbg)
-		emitfin(outf);
-
-	exit(0);
-}
diff --git a/src/mem.c b/src/mem.c
deleted file mode 100644
index bda43d7..0000000
--- a/src/mem.c
+++ /dev/null
@@ -1,81 +0,0 @@
-#include "all.h"
-
-/* Memory optimization:
- *
- * - replace alloced slots used only in
- *   load/store operations
- *   Assumption: all the accesses have the
- *   same size (this could be wrong...)
- */
-
-/* require use, maintains use counts */
-void
-memopt(Fn *fn)
-{
-	Blk *b;
-	Ins *i, *l;
-	Tmp *t;
-	Use *u, *ue;
-	int a;
-
-	b = fn->start;
-	for (i=b->ins; i-b->ins < b->nins; i++) {
-		if (OAlloc > i->op || i->op > OAlloc1)
-			continue;
-		assert(NAlign == 3);
-		assert(rtype(i->to) == RTmp);
-		t = &fn->tmp[i->to.val];
-		for (u=t->use; u != &t->use[t->nuse]; u++) {
-			if (u->type != UIns)
-				goto NextIns;
-			l = u->u.ins;
-			if (!isload(l->op)
-			&& (!isstore(l->op) || req(i->to, l->arg[0])))
-				goto NextIns;
-		}
-		/* get rid of the alloc and replace uses */
-		*i = (Ins){.op = ONop};
-		t->ndef--;
-		ue = &t->use[t->nuse];
-		for (u=t->use; u!=ue; u++) {
-			l = u->u.ins;
-			if (isstore(l->op)) {
-				if (l->op == OStores)
-					l->cls = Kd;
-				else if (l->op == OStored)
-					l->cls = Kd;
-				else if (l->op == OStorel)
-					l->cls = Kl;
-				else
-					l->cls = Kw;
-				l->op = OCopy;
-				l->to = l->arg[1];
-				l->arg[1] = R;
-				t->nuse--;
-				t->ndef++;
-			} else
-				/* try to turn loads into copies so we
-				 * can eliminate them later */
-				switch(l->op) {
-				case OLoad:
-					l->op = OCopy;
-					break;
-				case OLoadsw:
-				case OLoaduw:
-					l->cls = Kw;
-					l->op = OCopy;
-					break;
-				default:
-					/* keep l->cls */
-					a = l->op - OLoadsw;
-					l->op = OExtsw + a;
-					break;
-				}
-		}
-	NextIns:;
-	}
-	if (debug['M']) {
-		fprintf(stderr, "\n> After memory optimization:\n");
-		printfn(fn, stderr);
-	}
-}
diff --git a/src/parse.c b/src/parse.c
deleted file mode 100644
index 2590971..0000000
--- a/src/parse.c
+++ /dev/null
@@ -1,1099 +0,0 @@
-#include "all.h"
-#include <ctype.h>
-#include <stdarg.h>
-
-enum {
-	Kx = -1, /* Invalid operand */
-	Km = Kl, /* Memory pointer (for x64) */
-};
-
-OpDesc opdesc[NOp] = {
-#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}
-
-	/*            NAME       NM      ARGCLS0     ARGCLS1  SF LF */
-	[OAdd]    = { "add",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
-	[OSub]    = { "sub",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
-	[ODiv]    = { "div",      2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
-	[ORem]    = { "rem",      2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 },
-	[OUDiv]   = { "udiv",     2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
-	[OURem]   = { "urem",     2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 },
-	[OMul]    = { "mul",      2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
-	[OAnd]    = { "and",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
-	[OOr]     = { "or",       2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
-	[OXor]    = { "xor",      2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
-	[OSar]    = { "sar",      1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
-	[OShr]    = { "shr",      1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
-	[OShl]    = { "shl",      1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
-	[OStored] = { "stored",   0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 },
-	[OStores] = { "stores",   0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 },
-	[OStorel] = { "storel",   0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 },
-	[OStorew] = { "storew",   0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
-	[OStoreh] = { "storeh",   0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
-	[OStoreb] = { "storeb",   0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
-	[OLoad]   = { "load",     0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 },
-	[OLoadsw] = { "loadsw",   0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
-	[OLoaduw] = { "loaduw",   0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
-	[OLoadsh] = { "loadsh",   0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
-	[OLoaduh] = { "loaduh",   0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
-	[OLoadsb] = { "loadsb",   0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
-	[OLoadub] = { "loadub",   0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
-	[OExtsw]  = { "extsw",    0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
-	[OExtuw]  = { "extuw",    0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
-	[OExtsh]  = { "extsh",    0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
-	[OExtuh]  = { "extuh",    0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
-	[OExtsb]  = { "extsb",    0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
-	[OExtub]  = { "extub",    0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
-	[OExts]   = { "exts",     0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
-	[OTruncd] = { "truncd",   0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 },
-	[OFtosi]  = { "ftosi",    0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 },
-	[OSitof]  = { "sitof",    0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 },
-	[OCast]   = { "cast",     0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 },
-	[OCopy]   = { "copy",     1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 },
-	[ONop]    = { "nop",      0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 },
-	[OSwap]   = { "swap",     2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
-	[OSign]   = { "sign",     0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 },
-	[OSAlloc] = { "salloc",   0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 },
-	[OXDiv]   = { "xdiv",     1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 },
-	[OXCmp]   = { "xcmp",     1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
-	[OXTest]  = { "xtest",    1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 },
-	[OAddr]   = { "addr",     0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
-	[OPar]    = { "parn",     0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
-	[OParc]   = { "parc",     0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
-	[OArg]    = { "arg",      0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
-	[OArgc]   = { "argc",     0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
-	[OCall]   = { "call",     0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 },
-	[OXSetnp] = { "xsetnp",   0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
-	[OXSetp]  = { "xsetp",    0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
-	[OAlloc]   = { "alloc4",  1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
-	[OAlloc+1] = { "alloc8",  1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
-	[OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
-#define X(c) \
-	[OCmpw+IC##c] = { "c"    #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \
-	[OCmpl+IC##c] = { "c"    #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \
-	[OXSet+IC##c] = { "xset" #c,     0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 },
-	ICMPS(X)
-#undef X
-#define X(c) \
-	[OCmps+FC##c] = { "c"    #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \
-	[OCmpd+FC##c] = { "c"    #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 },
-	FCMPS(X)
-#undef X
-
-};
-#undef A
-
-typedef enum {
-	PXXX,
-	PLbl,
-	PPhi,
-	PIns,
-	PEnd,
-} PState;
-
-enum {
-	TXXX = NPubOp,
-	TCall,
-	TPhi,
-	TJmp,
-	TJnz,
-	TRet,
-	TExport,
-	TFunc,
-	TType,
-	TData,
-	TAlign,
-	TL,
-	TW,
-	TH,
-	TB,
-	TD,
-	TS,
-	TZ,
-
-	TInt,
-	TFlts,
-	TFltd,
-	TTmp,
-	TLbl,
-	TGlo,
-	TTyp,
-	TStr,
-
-	TPlus,
-	TEq,
-	TComma,
-	TLParen,
-	TRParen,
-	TLBrace,
-	TRBrace,
-	TNL,
-	TEOF,
-};
-
-
-static FILE *inf;
-static char *inpath;
-static int thead;
-static struct {
-	char chr;
-	double fltd;
-	float flts;
-	int64_t num;
-	char *str;
-} tokval;
-static int lnum;
-
-static Tmp *tmp;
-static Con *con;
-static int ntmp;
-static int ncon;
-static Phi **plink;
-static Blk **bmap;
-static Blk *curb;
-static Blk **blink;
-static int nblk;
-static int rcls;
-static int ntyp;
-
-
-void
-err(char *s, ...)
-{
-	char buf[100], *p, *end;
-	va_list ap;
-
-	p = buf;
-	end = buf + sizeof(buf);
-
-	va_start(ap, s);
-	p += snprintf(p, end - p, "%s:%d: ", inpath, lnum);
-	p += vsnprintf(p, end - p, s, ap);
-	va_end(ap);
-
-	diag(buf);
-}
-
-static int
-lex()
-{
-	static struct {
-		char *str;
-		int tok;
-	} tmap[] = {
-		{ "call", TCall },
-		{ "phi", TPhi },
-		{ "jmp", TJmp },
-		{ "jnz", TJnz },
-		{ "ret", TRet },
-		{ "export", TExport },
-		{ "function", TFunc },
-		{ "type", TType },
-		{ "data", TData },
-		{ "align", TAlign },
-		{ "l", TL },
-		{ "w", TW },
-		{ "h", TH },
-		{ "b", TB },
-		{ "d", TD },
-		{ "s", TS },
-		{ "z", TZ },
-		{ "loadw", OLoad }, /* for convenience */
-		{ "loadl", OLoad },
-		{ "loads", OLoad },
-		{ "loadd", OLoad },
-		{ "alloc1", OAlloc },
-		{ "alloc2", OAlloc },
-		{ 0, TXXX }
-	};
-	static char tok[NString];
-	int c, i;
-	int t;
-
-	do
-		c = fgetc(inf);
-	while (isblank(c));
-	t = TXXX;
-	tokval.chr = c;
-	switch (c) {
-	case EOF:
-		return TEOF;
-	case ',':
-		return TComma;
-	case '(':
-		return TLParen;
-	case ')':
-		return TRParen;
-	case '{':
-		return TLBrace;
-	case '}':
-		return TRBrace;
-	case '=':
-		return TEq;
-	case '+':
-		return TPlus;
-	case 's':
-		if (fscanf(inf, "_%f", &tokval.flts) != 1)
-			break;
-		return TFlts;
-	case 'd':
-		if (fscanf(inf, "_%lf", &tokval.fltd) != 1)
-			break;
-		return TFltd;
-	case '%':
-		t = TTmp;
-		goto Alpha;
-	case '@':
-		t = TLbl;
-		goto Alpha;
-	case '$':
-		t = TGlo;
-		goto Alpha;
-	case ':':
-		t = TTyp;
-		goto Alpha;
-	case '#':
-		while (fgetc(inf) != '\n')
-			;
-	case '\n':
-		lnum++;
-		return TNL;
-	}
-	if (isdigit(c) || c == '-' || c == '+') {
-		ungetc(c, inf);
-		if (fscanf(inf, "%"SCNd64, &tokval.num) != 1)
-			err("invalid integer literal");
-		return TInt;
-	}
-	if (c == '"') {
-		tokval.str = vnew(0, 1);
-		for (i=0;; i++) {
-			c = fgetc(inf);
-			vgrow(&tokval.str, i+1);
-			if (c == '"')
-			if (!i || tokval.str[i-1] != '\\') {
-				tokval.str[i] = 0;
-				return TStr;
-			}
-			tokval.str[i] = c;
-		}
-	}
-	if (0)
-Alpha:		c = fgetc(inf);
-	if (!isalpha(c) && c != '.' && c != '_')
-		err("lexing failure: invalid character %c (%d)", c, c);
-	i = 0;
-	do {
-		if (i >= NString-1)
-			err("identifier too long");
-		tok[i++] = c;
-		c = fgetc(inf);
-	} while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c));
-	tok[i] = 0;
-	ungetc(c, inf);
-	tokval.str = tok;
-	if (t != TXXX) {
-		return t;
-	}
-	for (i=0; i<NPubOp; i++)
-		if (opdesc[i].name)
-		if (strcmp(tok, opdesc[i].name) == 0)
-			return i;
-	for (i=0; tmap[i].str; i++)
-		if (strcmp(tok, tmap[i].str) == 0)
-			return tmap[i].tok;
-	err("unknown keyword %s", tokval.str);
-	return TXXX;
-}
-
-static int
-peek()
-{
-	if (thead == TXXX)
-		thead = lex();
-	return thead;
-}
-
-static int
-next()
-{
-	int t;
-
-	t = peek();
-	thead = TXXX;
-	return t;
-}
-
-static int
-nextnl()
-{
-	int t;
-
-	while ((t = next()) == TNL)
-		;
-	return t;
-}
-
-static void
-expect(int t)
-{
-	static char *ttoa[] = {
-		[TLbl] = "label",
-		[TComma] = ",",
-		[TEq] = "=",
-		[TNL] = "newline",
-		[TLParen] = "(",
-		[TRParen] = ")",
-		[TLBrace] = "{",
-		[TRBrace] = "}",
-		[TEOF] = 0,
-	};
-	char buf[128], *s1, *s2;
-	int t1;
-
-	t1 = next();
-	if (t == t1)
-		return;
-	s1 = ttoa[t] ? ttoa[t] : "??";
-	s2 = ttoa[t1] ? ttoa[t1] : "??";
-	sprintf(buf, "%s expected, got %s instead", s1, s2);
-	err(buf);
-}
-
-static Ref
-tmpref(char *v)
-{
-	int t;
-
-	for (t=Tmp0; t<ntmp; t++)
-		if (strcmp(v, tmp[t].name) == 0)
-			return TMP(t);
-	vgrow(&tmp, ++ntmp);
-	strcpy(tmp[t].name, v);
-	return TMP(t);
-}
-
-static Ref
-parseref()
-{
-	Con c;
-	int i;
-
-	memset(&c, 0, sizeof c);
-	switch (next()) {
-	case TTmp:
-		return tmpref(tokval.str);
-	case TInt:
-		c.type = CBits;
-		c.bits.i = tokval.num;
-		goto Look;
-	case TFlts:
-		c.type = CBits;
-		c.bits.s = tokval.flts;
-		c.flt = 1;
-		goto Look;
-	case TFltd:
-		c.type = CBits;
-		c.bits.d = tokval.fltd;
-		c.flt = 2;
-		goto Look;
-	case TGlo:
-		c.type = CAddr;
-		strcpy(c.label, tokval.str);
-	Look:
-		for (i=0; i<ncon; i++)
-			if (con[i].type == c.type
-			&& con[i].bits.i == c.bits.i
-			&& strcmp(con[i].label, c.label) == 0)
-				return CON(i);
-		vgrow(&con, ++ncon);
-		con[i] = c;
-		return CON(i);
-	default:
-		return R;
-	}
-}
-
-static int
-parsecls(int *tyn)
-{
-	int i;
-
-	switch (next()) {
-	default:
-		err("invalid class specifier");
-	case TTyp:
-		for (i=0; i<ntyp; i++)
-			if (strcmp(tokval.str, typ[i].name) == 0) {
-				*tyn = i;
-				return 4;
-			}
-		err("undefined type");
-	case TW:
-		return Kw;
-	case TL:
-		return Kl;
-	case TS:
-		return Ks;
-	case TD:
-		return Kd;
-	}
-}
-
-static void
-parserefl(int arg)
-{
-	int k, t, ty;
-	Ref r;
-
-	expect(TLParen);
-	if (peek() == TRParen) {
-		next();
-		return;
-	}
-	for (;;) {
-		if (curi - insb >= NIns)
-			err("too many instructions (1)");
-		k = parsecls(&ty);
-		r = parseref();
-		if (req(r, R))
-			err("invalid reference argument");
-		if (!arg && rtype(r) != RTmp)
-			err("invalid function parameter");
-		if (k == 4)
-			if (arg)
-				*curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl};
-			else
-				*curi = (Ins){OParc, r, {TYPE(ty)}, Kl};
-		else
-			if (arg)
-				*curi = (Ins){OArg, R, {r}, k};
-			else
-				*curi = (Ins){OPar, r, {R}, k};
-		curi++;
-		t = next();
-		if (t == TRParen)
-			break;
-		if (t != TComma)
-			err(", or ) expected");
-	}
-}
-
-static Blk *
-findblk(char *name)
-{
-	int i;
-
-	for (i=0; i<nblk; i++)
-		if (strcmp(bmap[i]->name, name) == 0)
-			return bmap[i];
-	vgrow(&bmap, ++nblk);
-	bmap[i] = blknew();
-	strcpy(bmap[i]->name, name);
-	return bmap[i];
-}
-
-static void
-closeblk()
-{
-	curb->nins = curi - insb;
-	idup(&curb->ins, insb, curb->nins);
-	blink = &curb->link;
-	curi = insb;
-}
-
-static PState
-parseline(PState ps)
-{
-	Ref arg[NPred] = {R};
-	Blk *blk[NPred];
-	Phi *phi;
-	Ref r;
-	Blk *b;
-	int t, op, i, k, ty;
-
-	t = nextnl();
-	if (ps == PLbl && t != TLbl && t != TRBrace)
-		err("label or } expected");
-	switch (t) {
-	default:
-		if (isstore(t)) {
-			/* operations without result */
-			r = R;
-			k = 0;
-			op = t;
-			goto DoOp;
-		}
-		err("label, instruction or jump expected");
-	case TRBrace:
-		return PEnd;
-	case TTmp:
-		break;
-	case TLbl:
-		b = findblk(tokval.str);
-		if (b->jmp.type != JXXX)
-			err("multiple definitions of block");
-		if (curb && curb->jmp.type == JXXX) {
-			closeblk();
-			curb->jmp.type = JJmp;
-			curb->s1 = b;
-		}
-		*blink = b;
-		curb = b;
-		plink = &curb->phi;
-		expect(TNL);
-		return PPhi;
-	case TRet:
-		curb->jmp.type = (int[]){
-			JRetw, JRetl,
-			JRets, JRetd,
-			JRetc, JRet0
-		}[rcls];
-		if (rcls < 5) {
-			r = parseref();
-			if (req(r, R))
-				err("return value expected");
-			curb->jmp.arg = r;
-		}
-		goto Close;
-	case TJmp:
-		curb->jmp.type = JJmp;
-		goto Jump;
-	case TJnz:
-		curb->jmp.type = JJnz;
-		r = parseref();
-		if (req(r, R))
-			err("invalid argument for jnz jump");
-		curb->jmp.arg = r;
-		expect(TComma);
-	Jump:
-		expect(TLbl);
-		curb->s1 = findblk(tokval.str);
-		if (curb->jmp.type != JJmp) {
-			expect(TComma);
-			expect(TLbl);
-			curb->s2 = findblk(tokval.str);
-		}
-	Close:
-		expect(TNL);
-		closeblk();
-		return PLbl;
-	}
-	r = tmpref(tokval.str);
-	expect(TEq);
-	k = parsecls(&ty);
-	op = next();
-DoOp:
-	if (op == TPhi) {
-		if (ps != PPhi)
-			err("unexpected phi instruction");
-		op = -1;
-	}
-	if (op == TCall) {
-		arg[0] = parseref();
-		parserefl(1);
-		expect(TNL);
-		op = OCall;
-		if (k == 4) {
-			k = Kl;
-			arg[1] = TYPE(ty);
-		} else
-			arg[1] = R;
-		goto Ins;
-	}
-	if (k == 4)
-		err("size class must be w, l, s, or d");
-	if (op >= NPubOp)
-		err("invalid instruction");
-	i = 0;
-	if (peek() != TNL)
-		for (;;) {
-			if (i == NPred)
-				err("too many arguments");
-			if (op == -1) {
-				expect(TLbl);
-				blk[i] = findblk(tokval.str);
-			}
-			arg[i] = parseref();
-			if (req(arg[i], R))
-				err("invalid instruction argument");
-			i++;
-			t = peek();
-			if (t == TNL)
-				break;
-			if (t != TComma)
-				err(", or end of line expected");
-			next();
-		}
-	next();
-	if (op != -1) {
-	Ins:
-		if (curi - insb >= NIns)
-			err("too many instructions (2)");
-		curi->op = op;
-		curi->cls = k;
-		curi->to = r;
-		curi->arg[0] = arg[0];
-		curi->arg[1] = arg[1];
-		curi++;
-		return PIns;
-	} else {
-		phi = alloc(sizeof *phi);
-		phi->to = r;
-		phi->cls = k;
-		memcpy(phi->arg, arg, i * sizeof arg[0]);
-		memcpy(phi->blk, blk, i * sizeof blk[0]);
-		phi->narg = i;
-		*plink = phi;
-		plink = &phi->link;
-		return PPhi;
-	}
-}
-
-static Fn *
-parsefn(int export)
-{
-	PState ps;
-	Fn *fn;
-
-	ntmp = Tmp0;
-	ncon = 1; /* first constant must be 0 */
-	curb = 0;
-	nblk = 0;
-	curi = insb;
-	tmp = vnew(ntmp, sizeof tmp[0]);
-	con = vnew(ncon, sizeof con[0]);
-	bmap = vnew(nblk, sizeof bmap[0]);
-	con[0].type = CBits;
-	fn = alloc(sizeof *fn);
-	fn->export = export;
-	blink = &fn->start;
-	fn->retty = -1;
-	if (peek() != TGlo)
-		rcls = parsecls(&fn->retty);
-	else
-		rcls = 5;
-	if (next() != TGlo)
-		err("function name expected");
-	strcpy(fn->name, tokval.str);
-	parserefl(0);
-	if (nextnl() != TLBrace)
-		err("function body must start with {");
-	ps = PLbl;
-	do
-		ps = parseline(ps);
-	while (ps != PEnd);
-	if (!curb)
-		err("empty file");
-	if (curb->jmp.type == JXXX)
-		err("last block misses jump");
-	fn->tmp = tmp;
-	fn->con = con;
-	fn->mem = vnew(0, sizeof fn->mem[0]);
-	fn->ntmp = ntmp;
-	fn->ncon = ncon;
-	fn->nmem = 0;
-	fn->nblk = nblk;
-	fn->rpo = 0;
-	return fn;
-}
-
-static void
-parsetyp()
-{
-	Typ *ty;
-	int t, n, sz, al, s, a, c, flt;
-
-	if (ntyp >= NTyp)
-		err("too many type definitions");
-	ty = &typ[ntyp++];
-	ty->align = -1;
-	if (nextnl() != TTyp ||  nextnl() != TEq)
-		err("type name, then = expected");
-	strcpy(ty->name, tokval.str);
-	t = nextnl();
-	if (t == TAlign) {
-		if (nextnl() != TInt)
-			err("alignment expected");
-		for (al=0; tokval.num /= 2; al++)
-			;
-		ty->align = al;
-		t = nextnl();
-	}
-	if (t != TLBrace)
-		err("type body must start with {");
-	t = nextnl();
-	if (t == TInt) {
-		ty->dark = 1;
-		ty->size = tokval.num;
-		if (ty->align == -1)
-			err("dark types need alignment");
-		t = nextnl();
-	} else {
-		ty->dark = 0;
-		n = -1;
-		sz = 0;
-		al = 0;
-		for (;;) {
-			flt = 0;
-			switch (t) {
-			default: err("invalid size specifier %c", tokval.chr);
-			case TD: flt = 1;
-			case TL: s = 8; a = 3; break;
-			case TS: flt = 1;
-			case TW: s = 4; a = 2; break;
-			case TH: s = 2; a = 1; break;
-			case TB: s = 1; a = 0; break;
-			}
-			if (a > al)
-				al = a;
-			if ((a = sz & (s-1))) {
-				a = s - a;
-				if (++n < NSeg) {
-					/* padding segment */
-					ty->seg[n].ispad = 1;
-					ty->seg[n].len = a;
-				}
-			}
-			t = nextnl();
-			if (t == TInt) {
-				c = tokval.num;
-				t = nextnl();
-			} else
-				c = 1;
-			while (c-- > 0) {
-				if (++n < NSeg) {
-					ty->seg[n].isflt = flt;
-					ty->seg[n].ispad = 0;
-					ty->seg[n].len = s;
-				}
-				sz += a + s;
-			}
-			if (t != TComma)
-				break;
-			t = nextnl();
-		}
-		if (++n >= NSeg)
-			ty->dark = 1;
-		else
-			ty->seg[n].len = 0;
-		if (ty->align == -1)
-			ty->align = al;
-		else
-			al = ty->align;
-		a = (1 << al) - 1;
-		ty->size = (sz + a) & ~a;
-	}
-	if (t != TRBrace)
-		err("expected closing }");
-}
-
-static void
-parsedatref(Dat *d)
-{
-	int t;
-
-	d->isref = 1;
-	d->u.ref.nam = tokval.str;
-	d->u.ref.off = 0;
-	t = peek();
-	if (t == TPlus) {
-		next();
-		if (next() != TInt)
-			err("invalid token after offset in ref");
-		d->u.ref.off = tokval.num;
-	}
-}
-
-static void
-parsedatstr(Dat *d)
-{
-	d->isstr = 1;
-	d->u.str = tokval.str;
-}
-
-static void
-parsedat(void cb(Dat *), int export)
-{
-	char s[NString];
-	int t;
-	Dat d;
-
-	d.type = DStart;
-	d.isstr = 0;
-	d.isref = 0;
-	d.export = export;
-	cb(&d);
-	if (nextnl() != TGlo || nextnl() != TEq)
-		err("data name, then = expected");
-	strcpy(s, tokval.str);
-	t = nextnl();
-	if (t == TAlign) {
-		if (nextnl() != TInt)
-			err("alignment expected");
-		d.type = DAlign;
-		d.u.num = tokval.num;
-		cb(&d);
-		t = nextnl();
-	}
-	d.type = DName;
-	d.u.str = s;
-	cb(&d);
-
-	if (t != TLBrace)
-		err("expected data contents in { .. }");
-	for (;;) {
-		switch (nextnl()) {
-		default: err("invalid size specifier %c in data", tokval.chr);
-		case TRBrace: goto Done;
-		case TL: d.type = DL; break;
-		case TW: d.type = DW; break;
-		case TH: d.type = DH; break;
-		case TB: d.type = DB; break;
-		case TS: d.type = DW; break;
-		case TD: d.type = DL; break;
-		case TZ: d.type = DZ; break;
-		}
-		t = nextnl();
-		do {
-			d.isref = 0;
-			d.isstr = 0;
-			memset(&d.u, 0, sizeof d.u);
-			if (t == TFlts)
-				d.u.flts = tokval.flts;
-			else if (t == TFltd)
-				d.u.fltd = tokval.fltd;
-			else if (t == TInt)
-				d.u.num = tokval.num;
-			else if (t == TGlo)
-				parsedatref(&d);
-			else if (t == TStr)
-				parsedatstr(&d);
-			else
-				err("constant literal expected");
-			cb(&d);
-			t = nextnl();
-		} while (t == TInt || t == TFlts || t == TFltd);
-		if (t == TRBrace)
-			break;
-		if (t != TComma)
-			err(", or } expected");
-	}
-Done:
-	d.type = DEnd;
-	cb(&d);
-}
-
-void
-parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
-{
-	int t, export;
-
-	inf = f;
-	inpath = path;
-	lnum = 1;
-	thead = TXXX;
-	ntyp = 0;
-	for (;;) {
-		export = 0;
-		switch (nextnl()) {
-		default:
-			err("top-level definition expected");
-		case TExport:
-			export = 1;
-			t = nextnl();
-			if (t == TFunc) {
-		case TFunc:
-				func(parsefn(export));
-				break;
-			}
-			else if (t == TData) {
-		case TData:
-				parsedat(data, export);
-				break;
-			}
-			else
-				err("export can only qualify data and function");
-		case TType:
-			parsetyp();
-			break;
-		case TEOF:
-			return;
-		}
-	}
-}
-
-static void
-printcon(Con *c, FILE *f)
-{
-	switch (c->type) {
-	case CUndef:
-		break;
-	case CAddr:
-		fprintf(f, "$%s", c->label);
-		if (c->bits.i)
-			fprintf(f, "%+"PRIi64, c->bits.i);
-		break;
-	case CBits:
-		if (c->flt == 1)
-			fprintf(f, "s_%f", c->bits.s);
-		else if (c->flt == 2)
-			fprintf(f, "d_%lf", c->bits.d);
-		else
-			fprintf(f, "%"PRIi64, c->bits.i);
-		break;
-	}
-}
-
-void
-printref(Ref r, Fn *fn, FILE *f)
-{
-	int i;
-	Mem *m;
-
-	switch (rtype(r)) {
-	case RTmp:
-		if (r.val < Tmp0)
-			fprintf(f, "R%d", r.val);
-		else
-			fprintf(f, "%%%s", fn->tmp[r.val].name);
-		break;
-	case RCon:
-		printcon(&fn->con[r.val], f);
-		break;
-	case RSlot:
-		fprintf(f, "S%d", r.val);
-		break;
-	case RACall:
-		fprintf(f, "%03x", r.val & AMask);
-		break;
-	case RAType:
-		fprintf(f, ":%s", typ[r.val & AMask].name);
-		break;
-	case RAMem:
-		i = 0;
-		m = &fn->mem[r.val & AMask];
-		fputc('[', f);
-		if (m->offset.type != CUndef) {
-			printcon(&m->offset, f);
-			i = 1;
-		}
-		if (!req(m->base, R)) {
-			if (i)
-				fprintf(f, " + ");
-			printref(m->base, fn, f);
-			i = 1;
-		}
-		if (!req(m->index, R)) {
-			if (i)
-				fprintf(f, " + ");
-			fprintf(f, "%d * ", m->scale);
-			printref(m->index, fn, f);
-		}
-		fputc(']', f);
-		break;
-	}
-}
-
-void
-printfn(Fn *fn, FILE *f)
-{
-	static char *jtoa[NJmp] = {
-		[JRet0]     = "ret",
-		[JRetw]     = "retw",
-		[JRetl]     = "retl",
-		[JRetc]     = "retc",
-		[JRets]     = "rets",
-		[JRetd]     = "retd",
-		[JJnz]      = "jnz",
-		[JXJnp]     = "xjnp",
-		[JXJp]      = "xjp",
-	#define X(c) [JXJc+IC##c] = "xj" #c,
-		ICMPS(X)
-	#undef X
-	};
-	static char prcls[NOp] = {
-		[OArg] = 1,
-		[OSwap] = 1,
-		[OXCmp] = 1,
-		[OXTest] = 1,
-		[OXDiv] = 1,
-		[OXIDiv] = 1,
-	};
-	static char ktoc[] = "wlsd";
-	Blk *b;
-	Phi *p;
-	Ins *i;
-	uint n;
-
-	if (fn->export)
-		fprintf(f, "export ");
-	fprintf(f, "function $%s() {\n", fn->name);
-	for (b=fn->start; b; b=b->link) {
-		fprintf(f, "@%s\n", b->name);
-		for (p=b->phi; p; p=p->link) {
-			fprintf(f, "\t");
-			printref(p->to, fn, f);
-			fprintf(f, " =%c phi ", ktoc[p->cls]);
-			assert(p->narg);
-			for (n=0;; n++) {
-				fprintf(f, "@%s ", p->blk[n]->name);
-				printref(p->arg[n], fn, f);
-				if (n == p->narg-1) {
-					fprintf(f, "\n");
-					break;
-				} else
-					fprintf(f, ", ");
-			}
-		}
-		for (i=b->ins; i-b->ins < b->nins; i++) {
-			fprintf(f, "\t");
-			if (!req(i->to, R)) {
-				printref(i->to, fn, f);
-				fprintf(f, " =%c ", ktoc[i->cls]);
-			}
-			assert(opdesc[i->op].name);
-			fprintf(f, "%s", opdesc[i->op].name);
-			if (req(i->to, R) && prcls[i->op])
-				fputc(ktoc[i->cls], f);
-			if (!req(i->arg[0], R)) {
-				fprintf(f, " ");
-				printref(i->arg[0], fn, f);
-			}
-			if (!req(i->arg[1], R)) {
-				fprintf(f, ", ");
-				printref(i->arg[1], fn, f);
-			}
-			fprintf(f, "\n");
-		}
-		switch (b->jmp.type) {
-		case JRet0:
-		case JRetw:
-		case JRetl:
-		case JRets:
-		case JRetd:
-		case JRetc:
-			fprintf(f, "\t%s", jtoa[b->jmp.type]);
-			if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) {
-				fprintf(f, " ");
-				printref(b->jmp.arg, fn, f);
-			}
-			if (b->jmp.type == JRetc)
-				fprintf(f, ", :%s", typ[fn->retty].name);
-			fprintf(f, "\n");
-			break;
-		case JJmp:
-			if (b->s1 != b->link)
-				fprintf(f, "\tjmp @%s\n", b->s1->name);
-			break;
-		default:
-			fprintf(f, "\t%s ", jtoa[b->jmp.type]);
-			if (b->jmp.type == JJnz) {
-				printref(b->jmp.arg, fn, f);
-				fprintf(f, ", ");
-			}
-			fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name);
-			break;
-		}
-	}
-	fprintf(f, "}\n");
-}
diff --git a/src/rega.c b/src/rega.c
deleted file mode 100644
index 7f8edcf..0000000
--- a/src/rega.c
+++ /dev/null
@@ -1,598 +0,0 @@
-#include "all.h"
-
-#ifdef TEST_PMOV
-	#undef assert
-	#define assert(x) assert_test(#x, x)
-#endif
-
-typedef struct RMap RMap;
-
-struct RMap {
-	int t[NIReg+NFReg];
-	int r[NIReg+NFReg];
-	BSet b[1];
-	int n;
-};
-
-static bits regu;      /* registers used */
-static Tmp *tmp;       /* function temporaries */
-static Mem *mem;       /* function mem references */
-static struct {
-	Ref src, dst;
-	int cls;
-} *pm;                 /* parallel move constructed */
-static int cpm, npm;   /* capacity and size of pm */
-
-static int *
-hint(int t)
-{
-	return &tmp[phicls(t, tmp)].hint.r;
-}
-
-static void
-sethint(int t, int r)
-{
-	bits m;
-
-	m = tmp[phicls(t, tmp)].hint.m;
-	if (*hint(t) == -1)
-	if (!(BIT(r) & m))
-		*hint(t) = r;
-}
-
-static void
-rcopy(RMap *ma, RMap *mb)
-{
-	memcpy(ma->t, mb->t, sizeof ma->t);
-	memcpy(ma->r, mb->r, sizeof ma->r);
-	bscopy(ma->b, mb->b);
-	ma->n = mb->n;
-}
-
-static int
-rfind(RMap *m, int t)
-{
-	int i;
-
-	for (i=0; i<m->n; i++)
-		if (m->t[i] == t)
-			return m->r[i];
-	return -1;
-}
-
-static Ref
-rref(RMap *m, int t)
-{
-	int r, s;
-
-	r = rfind(m, t);
-	if (r == -1) {
-		s = tmp[t].slot;
-		assert(s != -1 && "should have spilled");
-		return SLOT(s);
-	} else
-		return TMP(r);
-}
-
-static void
-radd(RMap *m, int t, int r)
-{
-	assert((t >= Tmp0 || t == r) && "invalid temporary");
-	assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
-	assert(!bshas(m->b, t) && "temporary has mapping");
-	assert(!bshas(m->b, r) && "register already allocated");
-	assert(m->n <= NIReg+NFReg && "too many mappings");
-	bsset(m->b, t);
-	bsset(m->b, r);
-	m->t[m->n] = t;
-	m->r[m->n] = r;
-	m->n++;
-	regu |= BIT(r);
-}
-
-static Ref
-ralloc(RMap *m, int t)
-{
-	bits regs;
-	int r, r0, r1;
-
-	if (t < Tmp0) {
-		assert(bshas(m->b, t));
-		return TMP(t);
-	}
-	if (bshas(m->b, t)) {
-		r = rfind(m, t);
-		assert(r != -1);
-		return TMP(r);
-	}
-	r = *hint(t);
-	if (r == -1 || bshas(m->b, r)) {
-		regs = tmp[phicls(t, tmp)].hint.m;
-		regs |= m->b->t[0];
-		switch (KBASE(tmp[t].cls)) {
-		case 0:
-			r0 = RAX;
-			r1 = RAX + NIReg;
-			break;
-		case 1:
-			r0 = XMM0;
-			r1 = XMM0 + NFReg;
-			break;
-		}
-		for (r=r0; r<r1; r++)
-			if (!(regs & BIT(r)))
-				goto Found;
-		for (r=r0; r<r1; r++)
-			if (!bshas(m->b, r))
-				goto Found;
-		diag("rega: no more regs");
-	}
-Found:
-	radd(m, t, r);
-	sethint(t, r);
-	return TMP(r);
-}
-
-static int
-rfree(RMap *m, int t)
-{
-	int i, r;
-
-	if (!bshas(m->b, t))
-		return -1;
-	for (i=0; m->t[i] != t; i++)
-		assert(i+1 < m->n);
-	r = m->r[i];
-	bsclr(m->b, t);
-	bsclr(m->b, r);
-	m->n--;
-	memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
-	memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
-	return r;
-}
-
-static void
-mdump(RMap *m)
-{
-	int i;
-
-	for (i=0; i<m->n; i++)
-		fprintf(stderr, " (%s, R%d)",
-			tmp[m->t[i]].name,
-			m->r[i]);
-	fprintf(stderr, "\n");
-}
-
-static void
-pmadd(Ref src, Ref dst, int k)
-{
-	if (npm == cpm) {
-		cpm = cpm * 2 + 16;
-		pm = realloc(pm, cpm * sizeof pm[0]);
-		if (!pm)
-			diag("pmadd: out of memory");
-	}
-	pm[npm].src = src;
-	pm[npm].dst = dst;
-	pm[npm].cls = k;
-	npm++;
-}
-
-enum PMStat { ToMove, Moving, Moved };
-
-static Ref
-pmrec(enum PMStat *status, int i, int *k)
-{
-	Ref swp, swp1;
-	int j, k1;
-
-	/* note, this routine might emit
-	 * too many large instructions:
-	 *
-	 *                  , x -- x
-	 *      x -- x -- x        |
-	 *                  ` x -- x
-	 *
-	 * if only the first move is wide
-	 * the whole cycle will be wide,
-	 * this is safe but not necessary
-	 */
-
-	if (req(pm[i].src, pm[i].dst))
-		return R;
-	status[i] = Moving;
-	assert(KBASE(*k) == KBASE(pm[i].cls));
-	assert((Kw|1) == Kl && (Ks|1) == Kd);
-	*k |= KWIDE(pm[i].cls); /* see above */
-	swp = R;
-	for (j=0; j<npm; j++) {
-		if (req(pm[j].src, pm[i].dst))
-			switch (status[j]) {
-			case ToMove:
-				k1 = *k;
-				swp1 = pmrec(status, j, &k1);
-				if (!req(swp1, R)) {
-					assert(req(swp, R));
-					swp = swp1;
-					*k = k1;
-				}
-				break;
-			case Moving:
-				assert(req(swp, R));
-				swp = pm[i].dst;
-				break;
-			case Moved:
-				break;
-			}
-	}
-	status[i] = Moved;
-	if (req(swp, R)) {
-		*curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls};
-		return R;
-	} else if (!req(swp, pm[i].src)) {
-		*curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k};
-		return swp;
-	} else
-		return R;
-
-}
-
-static void
-pmgen()
-{
-	int i, k;
-	enum PMStat *status;
-
-	status = alloc(npm * sizeof status[0]);
-	assert(!npm || status[npm-1] == ToMove);
-	curi = insb;
-	for (i=0; i<npm; i++)
-		if (status[i] == ToMove) {
-			k = pm[i].cls;
-			pmrec(status, i, &k);
-		}
-}
-
-static void
-move(int r, Ref to, RMap *m)
-{
-	int n, t, r1;
-
-	r1 = req(to, R) ? -1 : rfree(m, to.val);
-	if (bshas(m->b, r) && r1 != r) {
-		/* r is used and not by to */
-		for (n=0; m->r[n] != r; n++)
-			assert(n+1 < m->n);
-		t = m->t[n];
-		rfree(m, t);
-		bsset(m->b, r);
-		ralloc(m, t);
-		bsclr(m->b, r);
-	}
-	t = req(to, R) ? r : to.val;
-	radd(m, t, r);
-}
-
-static int
-regcpy(Ins *i)
-{
-	return i->op == OCopy && isreg(i->arg[0]);
-}
-
-static Ins *
-dopm(Blk *b, Ins *i, RMap *m)
-{
-	RMap m0;
-	int n, r, r1, t, s;
-	Ins *i0, *i1, *ip, *ir;
-	bits def;
-
-	m0 = *m;
-	i1 = ++i;
-	do {
-		i--;
-		move(i->arg[0].val, i->to, m);
-	} while (i != b->ins && regcpy(i-1));
-	assert(m0.n <= m->n);
-	if (i != b->ins && (i-1)->op == OCall) {
-		def = retregs((i-1)->arg[1], 0);
-		for (r=0; r<NRSave; r++)
-			if (!(BIT(rsave[r]) & def))
-				move(rsave[r], R, m);
-	}
-	for (npm=0, n=0; n<m->n; n++) {
-		t = m->t[n];
-		s = tmp[t].slot;
-		r1 = m->r[n];
-		r = rfind(&m0, t);
-		if (r != -1)
-			pmadd(TMP(r1), TMP(r), tmp[t].cls);
-		else if (s != -1)
-			pmadd(TMP(r1), SLOT(s), tmp[t].cls);
-	}
-	for (ip=i; ip<i1; ip++) {
-		if (!req(ip->to, R))
-			rfree(m, ip->to.val);
-		r = ip->arg[0].val;
-		if (rfind(m, r) == -1)
-			radd(m, r, r);
-	}
-	pmgen();
-#ifdef TEST_PMOV
-	return 0;
-#endif
-	n = b->nins - (i1 - i) + (curi - insb);
-	i0 = alloc(n * sizeof(Ins));
-	ip = icpy(ip = i0, b->ins, i - b->ins);
-	ip = icpy(ir = ip, insb, curi - insb);
-	ip = icpy(ip, i1, &b->ins[b->nins] - i1);
-	b->nins = n;
-	b->ins = i0;
-	return ir;
-}
-
-static int
-prio(Ref r1, Ref r2)
-{
-	/* trivial heuristic to begin with,
-	 * later we can use the distance to
-	 * the definition instruction
-	 */
-	(void) r2;
-	return *hint(r1.val) != -1;
-}
-
-static void
-insert(Ref *r, Ref **rs, int p)
-{
-	int i;
-
-	rs[i = p] = r;
-	while (i-- > 0 && prio(*r, *rs[i])) {
-		rs[i+1] = rs[i];
-		rs[i] = r;
-	}
-}
-
-static void
-doblk(Blk *b, RMap *cur)
-{
-	int x, r, nr;
-	bits rs;
-	Ins *i;
-	Mem *m;
-	Ref *ra[4];
-
-	if (rtype(b->jmp.arg) == RTmp)
-		b->jmp.arg = ralloc(cur, b->jmp.arg.val);
-	else if (rtype(b->jmp.arg) == RACall) {
-		/* add return registers */
-		rs = retregs(b->jmp.arg, 0);
-		for (r=0; rs; rs/=2, r++)
-			if (rs & 1)
-				radd(cur, r, r);
-	}
-	for (i=&b->ins[b->nins]; i!=b->ins;) {
-		switch ((--i)->op) {
-		case OCall:
-			rs = argregs(i->arg[1], 0);
-			for (r=0; r<NRSave; r++)
-				if (!(BIT(rsave[r]) & rs))
-					rfree(cur, rsave[r]);
-			break;
-		case OCopy:
-			if (isreg(i->arg[0])) {
-				i = dopm(b, i, cur);
-				continue;
-			}
-			if (isreg(i->to))
-			if (rtype(i->arg[0]) == RTmp)
-				sethint(i->arg[0].val, i->to.val);
-			/* fall through */
-		default:
-			if (!req(i->to, R)) {
-				assert(rtype(i->to) == RTmp);
-				r = rfree(cur, i->to.val);
-				if (r == -1 && !isreg(i->to)) {
-					*i = (Ins){.op = ONop};
-					continue;
-				}
-				if (i->to.val >= Tmp0)
-					i->to = TMP(r);
-			}
-			break;
-		}
-		for (x=0, nr=0; x<2; x++)
-			switch (rtype(i->arg[x])) {
-			case RAMem:
-				m = &mem[i->arg[x].val & AMask];
-				if (rtype(m->base) == RTmp)
-					insert(&m->base, ra, nr++);
-				if (rtype(m->index) == RTmp)
-					insert(&m->index, ra, nr++);
-				break;
-			case RTmp:
-				insert(&i->arg[x], ra, nr++);
-				break;
-			}
-		for (r=0; r<nr; r++)
-			*ra[r] = ralloc(cur, ra[r]->val);
-	}
-}
-
-/* register allocation
- * depends on rpo, phi, cost, (and obviously spill)
- */
-void
-rega(Fn *fn)
-{
-	int j, n, t, r, r1, x, rl[Tmp0];
-	Blk *b, *b1, *s, ***ps, *blist;
-	RMap *end, *beg, cur, old;
-	Ins *i;
-	Phi *p;
-	uint u;
-	Ref src, dst;
-
-	/* 1. setup */
-	regu = 0;
-	tmp = fn->tmp;
-	mem = fn->mem;
-	end = alloc(fn->nblk * sizeof end[0]);
-	beg = alloc(fn->nblk * sizeof beg[0]);
-	for (n=0; n<fn->nblk; n++) {
-		bsinit(end[n].b, fn->ntmp);
-		bsinit(beg[n].b, fn->ntmp);
-	}
-	bsinit(cur.b, fn->ntmp);
-	bsinit(old.b, fn->ntmp);
-
-	for (t=Tmp0; t<fn->ntmp; t++)
-		*hint(t) = -1;
-	for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
-		if (i->op != OCopy || !isreg(i->arg[0]))
-			break;
-		else {
-			assert(rtype(i->to) == RTmp);
-			sethint(i->to.val, i->arg[0].val);
-		}
-
-	/* 2. assign registers following post-order */
-	for (n=fn->nblk-1; n>=0; n--) {
-		b = fn->rpo[n];
-		cur.n = 0;
-		bszero(cur.b);
-		for (x=0; x<2; x++)
-			for (t=Tmp0; t<fn->ntmp; t++) {
-				assert(bshas(b->out, t) ||
-					!bshas(cur.b, t));
-				if (bshas(b->out, t))
-				if (!bshas(cur.b, t))
-				if (x || (r=*hint(t)) != -1)
-				if (x || !bshas(cur.b, r))
-					ralloc(&cur, t);
-			}
-		rcopy(&end[n], &cur);
-		doblk(b, &cur);
-		bscopy(b->in, cur.b);
-		for (p=b->phi; p; p=p->link)
-			if (rtype(p->to) == RTmp) {
-				bsclr(b->in, p->to.val);
-				/* heuristic 0:
-				 * if the phi destination has an
-				 * argument from a frequent block
-				 * that was already allocated to
-				 * 'r', use 'r' as the new hint
-				 */
-				memset(rl, 0, sizeof rl);
-				for (u=0; u<p->narg; u++) {
-					t = p->arg[u].val;
-					b1 = p->blk[u];
-					if (rtype(p->arg[u]) == RTmp)
-					if ((r=rfind(&end[b1->id], t)) != -1)
-						rl[r] += b1->loop;
-				}
-				for (x=0, j=0; j<Tmp0; j++)
-					if (rl[j] > rl[x])
-						x = j;
-				if (rl[x] >= b->loop)
-					*hint(p->to.val) = x;
-			}
-		if (b->npred > 1) {
-			/* heuristic 1:
-			 * attempt to satisfy hints
-			 * when it's simple and we have
-			 * multiple predecessors
-			 */
-			rcopy(&old, &cur);
-			curi = &insb[NIns];
-			for (j=0; j<old.n; j++) {
-				t = old.t[j];
-				r = *hint(t);
-				r1 = rfind(&cur, t);
-				if (r != -1 && r != r1)
-				if (!bshas(cur.b, r)) {
-					rfree(&cur, t);
-					radd(&cur, t, r);
-					x = tmp[t].cls;
-					emit(OCopy, x, TMP(r1), TMP(r), R);
-				}
-			}
-			if ((j = &insb[NIns] - curi)) {
-				b->nins += j;
-				i = alloc(b->nins * sizeof(Ins));
-				icpy(icpy(i, curi, j), b->ins, b->nins-j);
-				b->ins = i;
-			}
-		}
-		rcopy(&beg[n], &cur);
-	}
-	if (debug['R'])  {
-		fprintf(stderr, "\n> Register mappings:\n");
-		for (n=0; n<fn->nblk; n++) {
-			b = fn->rpo[n];
-			fprintf(stderr, "\t%-10s beg", b->name);
-			mdump(&beg[n]);
-			fprintf(stderr, "\t           end");
-			mdump(&end[n]);
-		}
-		fprintf(stderr, "\n");
-	}
-
-	/* 3. compose glue code */
-	blist = 0;
-	for (b=fn->start;; b=b->link) {
-		ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
-		for (; (s=**ps); ps++) {
-			npm = 0;
-			for (p=s->phi; p; p=p->link) {
-				dst = p->to;
-				assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
-				if (rtype(dst) == RTmp) {
-					r = rfind(&beg[s->id], dst.val);
-					if (r == -1)
-						continue;
-					dst = TMP(r);
-				}
-				for (u=0; p->blk[u]!=b; u++)
-					assert(u+1 < p->narg);
-				src = p->arg[u];
-				if (rtype(src) == RTmp)
-					src = rref(&end[b->id], src.val);
-				pmadd(src, dst, p->cls);
-			}
-			for (t=Tmp0; t<fn->ntmp; t++)
-				if (bshas(s->in, t)) {
-					src = rref(&end[b->id], t);
-					dst = rref(&beg[s->id], t);
-					pmadd(src, dst, tmp[t].cls);
-				}
-			pmgen();
-			if (curi == insb)
-				continue;
-			b1 = blknew();
-			b1->loop = (b->loop+s->loop) / 2;
-			b1->link = blist;
-			blist = b1;
-			fn->nblk++;
-			sprintf(b1->name, "%s_%s", b->name, s->name);
-			b1->nins = curi - insb;
-			idup(&b1->ins, insb, b1->nins);
-			b1->jmp.type = JJmp;
-			b1->s1 = s;
-			**ps = b1;
-		}
-		if (!b->link) {
-			b->link = blist;
-			break;
-		}
-	}
-	for (b=fn->start; b; b=b->link)
-		b->phi = 0;
-	fn->reg = regu;
-
-	if (debug['R']) {
-		fprintf(stderr, "\n> After register allocation:\n");
-		printfn(fn, stderr);
-	}
-}
diff --git a/src/spill.c b/src/spill.c
deleted file mode 100644
index 72f8106..0000000
--- a/src/spill.c
+++ /dev/null
@@ -1,507 +0,0 @@
-#include "all.h"
-
-static void
-loopmark(Blk *hd, Blk *b, Phi *p)
-{
-	int k, head;
-	uint n, a;
-
-	head = hd->id;
-	if (b->id < head)
-		return;
-	for (; p; p=p->link)
-		for (a=0; a<p->narg; a++)
-			if (p->blk[a] == b)
-			if (rtype(p->arg[a]) == RTmp)
-				bsset(hd->gen, p->arg[a].val);
-	if (b->visit == head)
-		return;
-	b->visit = head;
-	b->loop *= 10;
-	/* aggregate looping information at
-	 * loop headers */
-	bsunion(hd->gen, b->gen);
-	for (k=0; k<2; k++)
-		if (b->nlive[k] > hd->nlive[k])
-			hd->nlive[k] = b->nlive[k];
-	for (n=0; n<b->npred; n++)
-		loopmark(hd, b->pred[n], b->phi);
-}
-
-static void
-tmpuse(Ref r, int use, int loop, Fn *fn)
-{
-	Mem *m;
-	Tmp *t;
-
-	if (rtype(r) == RAMem) {
-		m = &fn->mem[r.val & AMask];
-		tmpuse(m->base, 1, loop, fn);
-		tmpuse(m->index, 1, loop, fn);
-	}
-	else if (rtype(r) == RTmp && r.val >= Tmp0) {
-		t = &fn->tmp[r.val];
-		t->nuse += use;
-		t->ndef += !use;
-		t->cost += loop;
-	}
-}
-
-/* evaluate spill costs of temporaries,
- * this also fills usage information
- * requires rpo, preds
- */
-void
-fillcost(Fn *fn)
-{
-	int n, hd;
-	uint a;
-	Blk *b;
-	Ins *i;
-	Tmp *t;
-	Phi *p;
-
-	for (b=fn->start; b; b=b->link) {
-		b->loop = 1;
-		b->visit = -1;
-	}
-	if (debug['S'])
-		fprintf(stderr, "\n> Loop information:\n");
-	for (n=0; n<fn->nblk; n++) {
-		b = fn->rpo[n];
-		hd = 0;
-		for (a=0; a<b->npred; a++)
-			if (b->pred[a]->id >= n) {
-				loopmark(b, b->pred[a], b->phi);
-				hd = 1;
-			}
-		if (hd && debug['S']) {
-			fprintf(stderr, "\t%-10s", b->name);
-			fprintf(stderr, " (% 3d ", b->nlive[0]);
-			fprintf(stderr, "% 3d) ", b->nlive[1]);
-			dumpts(b->gen, fn->tmp, stderr);
-		}
-	}
-	for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
-		t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0;
-		t->nuse = 0;
-		t->ndef = 0;
-	}
-	for (b=fn->start; b; b=b->link) {
-		for (p=b->phi; p; p=p->link) {
-			/* todo, the cost computation
-			 * for p->to is not great... */
-			tmpuse(p->to, 0, 0, fn);
-			for (a=0; a<p->narg; a++) {
-				n = p->blk[a]->loop;
-				assert(b->npred==p->narg &&
-					"wrong cfg");
-				n /= b->npred;
-				tmpuse(p->arg[a], 1, n, fn);
-			}
-		}
-		n = b->loop;
-		for (i=b->ins; i-b->ins < b->nins; i++) {
-			tmpuse(i->to, 0, n, fn);
-			tmpuse(i->arg[0], 1, n, fn);
-			tmpuse(i->arg[1], 1, n, fn);
-		}
-		tmpuse(b->jmp.arg, 1, n, fn);
-	}
-	if (debug['S']) {
-		fprintf(stderr, "\n> Spill costs:\n");
-		for (n=Tmp0; n<fn->ntmp; n++)
-			fprintf(stderr, "\t%-10s %d\n",
-				fn->tmp[n].name,
-				fn->tmp[n].cost);
-		fprintf(stderr, "\n");
-	}
-}
-
-static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
-static Tmp *tmp;  /* current temporaries (for tcmpX) */
-static int ntmp;  /* current # of temps (for limit) */
-static int locs;  /* stack size used by locals */
-static int slot4; /* next slot of 4 bytes */
-static int slot8; /* ditto, 8 bytes */
-static BSet mask[2][1]; /* class masks */
-
-static int
-tcmp0(const void *pa, const void *pb)
-{
-	return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost;
-}
-
-static int
-tcmp1(const void *pa, const void *pb)
-{
-	int c;
-
-	c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
-	return c ? c : tcmp0(pa, pb);
-}
-
-static Ref
-slot(int t)
-{
-	int s;
-
-	if (t < Tmp0)
-		diag("spill: cannot spill register");
-	s = tmp[t].slot;
-	if (s == -1) {
-		assert(NAlign == 3);
-		/* nice logic to pack stack slots
-		 * on demand, there can be only
-		 * one hole and slot4 points to it
-		 *
-		 * invariant: slot4 <= slot8
-		 */
-		if (KWIDE(tmp[t].cls)) {
-			s = slot8;
-			if (slot4 == slot8)
-				slot4 += 2;
-			slot8 += 2;
-		} else {
-			s = slot4;
-			if (slot4 == slot8) {
-				slot8 += 2;
-				slot4 += 1;
-			} else
-				slot4 = slot8;
-		}
-		s += locs;
-		tmp[t].slot = s;
-	}
-	return SLOT(s);
-}
-
-static void
-limit(BSet *b, int k, BSet *f)
-{
-	static int *tarr, maxt;
-	int i, nt;
-	uint t;
-
-	nt = bscount(b);
-	if (nt <= k)
-		return;
-	if (nt > maxt) {
-		free(tarr);
-		tarr = emalloc(nt * sizeof tarr[0]);
-		maxt = nt;
-	}
-	for (i=0, t=0; bsiter(b, &t); t++) {
-		bsclr(b, t);
-		tarr[i++] = t;
-	}
-	if (!f)
-		qsort(tarr, nt, sizeof tarr[0], tcmp0);
-	else {
-		fst = f;
-		qsort(tarr, nt, sizeof tarr[0], tcmp1);
-	}
-	for (i=0; i<k && i<nt; i++)
-		bsset(b, tarr[i]);
-	for (; i<nt; i++)
-		slot(tarr[i]);
-}
-
-static void
-limit2(BSet *b1, int k1, int k2, BSet *fst)
-{
-	BSet b2[1];
-
-	bsinit(b2, ntmp); /* todo, free those */
-	bscopy(b2, b1);
-	bsinter(b1, mask[0]);
-	bsinter(b2, mask[1]);
-	limit(b1, NIReg - k1, fst);
-	limit(b2, NFReg - k2, fst);
-	bsunion(b1, b2);
-}
-
-static void
-sethint(BSet *u, bits r)
-{
-	uint t;
-
-	for (t=Tmp0; bsiter(u, &t); t++)
-		tmp[phicls(t, tmp)].hint.m |= r;
-}
-
-static void
-reloads(BSet *u, BSet *v)
-{
-	uint t;
-
-	for (t=Tmp0; bsiter(u, &t); t++)
-		if (!bshas(v, t))
-			emit(OLoad, tmp[t].cls, TMP(t), slot(t), R);
-}
-
-static void
-store(Ref r, int s)
-{
-	static int kstore[] = {
-		[Kw] = OStorew, [Kl] = OStorel,
-		[Ks] = OStores, [Kd] = OStored,
-	};
-
-	if (s != -1)
-		emit(kstore[tmp[r.val].cls], 0, R, r, SLOT(s));
-}
-
-static int
-regcpy(Ins *i)
-{
-	return i->op == OCopy && isreg(i->arg[0]);
-}
-
-static Ins *
-dopm(Blk *b, Ins *i, BSet *v)
-{
-	int n, t;
-	BSet u[1];
-	Ins *i1;
-	bits r;
-
-	bsinit(u, ntmp); /* todo, free those */
-	/* consecutive copies from
-	 * registers need to be handled
-	 * as one large instruction
-	 *
-	 * fixme: there is an assumption
-	 * that calls are always followed
-	 * by copy instructions here, this
-	 * might not be true if previous
-	 * passes change
-	 */
-	i1 = ++i;
-	do {
-		i--;
-		t = i->to.val;
-		if (!req(i->to, R))
-		if (bshas(v, t)) {
-			bsclr(v, t);
-			store(i->to, tmp[t].slot);
-		}
-		bsset(v, i->arg[0].val);
-	} while (i != b->ins && regcpy(i-1));
-	bscopy(u, v);
-	if (i != b->ins && (i-1)->op == OCall) {
-		v->t[0] &= ~retregs((i-1)->arg[1], 0);
-		limit2(v, NISave, NFSave, 0);
-		for (r=0, n=0; n<NRSave; n++)
-			r |= BIT(rsave[n]);
-		v->t[0] |= argregs((i-1)->arg[1], 0);
-	} else {
-		limit2(v, 0, 0, 0);
-		r = v->t[0];
-	}
-	sethint(v, r);
-	reloads(u, v);
-	do
-		emiti(*--i1);
-	while (i1 != i);
-	return i;
-}
-
-/* spill code insertion
- * requires spill costs, rpo, liveness
- *
- * Note: this will replace liveness
- * information (in, out) with temporaries
- * that must be in registers at block
- * borders
- *
- * Be careful with:
- * - OCopy instructions to ensure register
- *   constraints
- */
-void
-spill(Fn *fn)
-{
-	Blk *b, *s1, *s2, *hd, **bp;
-	int j, n, l, t, k, lvarg[2];
-	BSet u[1], v[1], w[1];
-	Ins *i;
-	Phi *p;
-	Mem *m;
-	bits r;
-
-	tmp = fn->tmp;
-	ntmp = fn->ntmp;
-	bsinit(u, ntmp);
-	bsinit(v, ntmp);
-	bsinit(w, ntmp);
-	bsinit(mask[0], ntmp);
-	bsinit(mask[1], ntmp);
-	locs = fn->slot;
-	slot4 = 0;
-	slot8 = 0;
-	for (t=0; t<ntmp; t++) {
-		k = 0;
-		if (t >= XMM0 && t < XMM0 + NFReg)
-			k = 1;
-		else if (t >= Tmp0)
-			k = KBASE(tmp[t].cls);
-		bsset(mask[k], t);
-	}
-
-	for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
-		b = *--bp;
-		/* invariant: all bocks with bigger rpo got
-		 * their in,out updated. */
-
-		/* 1. find temporaries in registers at
-		 * the end of the block (put them in v) */
-		curi = 0;
-		s1 = b->s1;
-		s2 = b->s2;
-		hd = 0;
-		if (s1 && s1->id <= n)
-			hd = s1;
-		if (s2 && s2->id <= n)
-		if (!hd || s2->id >= hd->id)
-			hd = s2;
-		r = 0;
-		bszero(v);
-		if (hd) {
-			/* back-edge */
-			for (k=0; k<2; k++) {
-				n = k == 0 ? NIReg : NFReg;
-				bscopy(u, b->out);
-				bsinter(u, mask[k]);
-				bscopy(w, u);
-				bsinter(u, hd->gen);
-				bsdiff(w, hd->gen);
-				if ((int)bscount(u) < n) { /* fixme */
-					j = bscount(w);   /* live through */
-					l = hd->nlive[k];
-					limit(w, n - (l - j), 0);
-					bsunion(u, w);
-				} else
-					limit(u, n, 0);
-				bsunion(v, u);
-			}
-		} else if (s1) {
-			liveon(v, b, s1);
-			if (s2) {
-				liveon(u, b, s2);
-				bscopy(w, u);
-				bsinter(w, v);
-				bsunion(v, u);
-			}
-			limit2(v, 0, 0, w);
-		} else if (rtype(b->jmp.arg) == RACall) {
-			/* return */
-			r = retregs(b->jmp.arg, 0);
-			v->t[0] |= r;
-		}
-		bscopy(b->out, v);
-
-		/* 2. process the block instructions */
-		curi = &insb[NIns];
-		for (i=&b->ins[b->nins]; i!=b->ins;) {
-			i--;
-			if (regcpy(i)) {
-				i = dopm(b, i, v);
-				continue;
-			}
-			bszero(w);
-			if (!req(i->to, R)) {
-				assert(rtype(i->to) == RTmp);
-				t = i->to.val;
-				if (bshas(v, t))
-					bsclr(v, t);
-				else {
-					/* make sure we have a reg
-					 * for the result */
-					bsset(v, t);
-					bsset(w, t);
-				}
-			}
-			j = opdesc[i->op].nmem;
-			for (n=0; n<2; n++)
-				if (rtype(i->arg[n]) == RAMem)
-					j--;
-			for (n=0; n<2; n++)
-				switch (rtype(i->arg[n])) {
-				case RAMem:
-					t = i->arg[n].val;
-					m = &fn->mem[t & AMask];
-					if (rtype(m->base) == RTmp) {
-						bsset(v, m->base.val);
-						bsset(w, m->base.val);
-					}
-					if (rtype(m->index) == RTmp) {
-						bsset(v, m->index.val);
-						bsset(w, m->index.val);
-					}
-					break;
-				case RTmp:
-					t = i->arg[n].val;
-					lvarg[n] = bshas(v, t);
-					bsset(v, t);
-					if (j-- <= 0)
-						bsset(w, t);
-					break;
-				}
-			bscopy(u, v);
-			limit2(v, 0, 0, w);
-			for (n=0; n<2; n++)
-				if (rtype(i->arg[n]) == RTmp) {
-					t = i->arg[n].val;
-					if (!bshas(v, t)) {
-						/* do not reload if the
-						 * the temporary was dead
-						 */
-						if (!lvarg[n])
-							bsclr(u, t);
-						i->arg[n] = slot(t);
-					}
-				}
-			reloads(u, v);
-			if (!req(i->to, R)) {
-				t = i->to.val;
-				store(i->to, tmp[t].slot);
-				bsclr(v, t);
-			}
-			emiti(*i);
-			r = v->t[0] & (BIT(Tmp0)-1);
-			if (r)
-				sethint(v, r);
-		}
-		assert(!r || b==fn->start);
-
-		for (p=b->phi; p; p=p->link) {
-			assert(rtype(p->to) == RTmp);
-			t = p->to.val;
-			if (bshas(v, t)) {
-				bsclr(v, t);
-				store(p->to, tmp[t].slot);
-			} else if (bshas(b->in, t))
-				/* only if the phi is live */
-				p->to = slot(p->to.val);
-		}
-		bscopy(b->in, v);
-		b->nins = &insb[NIns] - curi;
-		idup(&b->ins, curi, b->nins);
-	}
-
-	/* align the locals to a 16 byte boundary */
-	assert(NAlign == 3);
-	slot8 += slot8 & 3;
-	fn->slot += slot8;
-
-	if (debug['S']) {
-		fprintf(stderr, "\n> Block information:\n");
-		for (b=fn->start; b; b=b->link) {
-			printf("\t%-10s (% 5d) ", b->name, b->loop);
-			dumpts(b->out, fn->tmp, stdout);
-		}
-		fprintf(stderr, "\n> After spilling:\n");
-		printfn(fn, stderr);
-	}
-}
diff --git a/src/ssa.c b/src/ssa.c
deleted file mode 100644
index 0c163aa..0000000
--- a/src/ssa.c
+++ /dev/null
@@ -1,516 +0,0 @@
-#include "all.h"
-#include <stdarg.h>
-
-static void
-adduse(Tmp *tmp, int ty, Blk *b, ...)
-{
-	Use *u;
-	int n;
-	va_list ap;
-
-	va_start(ap, b);
-	n = tmp->nuse;
-	vgrow(&tmp->use, ++tmp->nuse);
-	u = &tmp->use[n];
-	u->type = ty;
-	u->bid = b->id;
-	switch (ty) {
-	default:
-		diag("ssa: adduse defaulted");
-	case UPhi:
-		u->u.phi = va_arg(ap, Phi *);
-		break;
-	case UIns:
-		u->u.ins = va_arg(ap, Ins *);
-		break;
-	case UJmp:
-		break;
-	}
-	va_end(ap);
-}
-
-/* fill usage, phi, and class information
- */
-void
-filluse(Fn *fn)
-{
-	Blk *b;
-	Phi *p;
-	Ins *i;
-	int m, t;
-	uint a;
-	Tmp *tmp;
-
-	/* todo, is this the correct file? */
-	tmp = fn->tmp;
-	for (t=0; t<fn->ntmp; t++) {
-		tmp[t].ndef = 0;
-		tmp[t].nuse = 0;
-		tmp[t].phi = 0;
-		tmp[t].cls = 0;
-		if (tmp[t].use == 0)
-			tmp[t].use = vnew(0, sizeof(Use));
-	}
-	for (b=fn->start; b; b=b->link) {
-		for (p=b->phi; p; p=p->link) {
-			assert(rtype(p->to) == RTmp);
-			t = p->to.val;
-			tmp[t].ndef++;
-			tmp[t].cls = p->cls;
-			tmp[t].phi = p->to.val;
-			for (a=0; a<p->narg; a++)
-				if (rtype(p->arg[a]) == RTmp) {
-					t = p->arg[a].val;
-					adduse(&tmp[t], UPhi, b, p);
-					if (!tmp[t].phi)
-						tmp[t].phi = p->to.val;
-				}
-		}
-		for (i=b->ins; i-b->ins < b->nins; i++) {
-			if (!req(i->to, R)) {
-				assert(rtype(i->to) == RTmp);
-				t = i->to.val;
-				tmp[t].ndef++;
-				tmp[t].cls = i->cls;
-			}
-			for (m=0; m<2; m++)
-				if (rtype(i->arg[m]) == RTmp) {
-					t = i->arg[m].val;
-					adduse(&tmp[t], UIns, b, i);
-				}
-		}
-		if (rtype(b->jmp.arg) == RTmp)
-			adduse(&tmp[b->jmp.arg.val], UJmp, b);
-	}
-}
-
-static void
-addpred(Blk *bp, Blk *bc)
-{
-	uint i;
-
-	if (!bc->pred) {
-		bc->pred = alloc(bc->npred * sizeof bc->pred[0]);
-		for (i=0; i<bc->npred; i++)
-			bc->pred[i] = 0;
-	}
-	for (i=0; bc->pred[i]; i++)
-		;
-	bc->pred[i] = bp;
-}
-
-/* fill predecessors information in blocks
- */
-void
-fillpreds(Fn *f)
-{
-	Blk *b;
-
-	for (b=f->start; b; b=b->link) {
-		b->npred = 0;
-		b->pred = 0;
-	}
-	for (b=f->start; b; b=b->link) {
-		if (b->s1)
-			b->s1->npred++;
-		if (b->s2)
-			b->s2->npred++;
-	}
-	for (b=f->start; b; b=b->link) {
-		if (b->s1)
-			addpred(b, b->s1);
-		if (b->s2)
-			addpred(b, b->s2);
-	}
-}
-
-static int
-rporec(Blk *b, int x)
-{
-	Blk *s1, *s2;
-
-	if (!b || b->id >= 0)
-		return x;
-	b->id = 1;
-	s1 = b->s1;
-	s2 = b->s2;
-	if (s1 && s2 && s1->loop > s2->loop) {
-		s1 = b->s2;
-		s2 = b->s1;
-	}
-	x = rporec(s1, x);
-	x = rporec(s2, x);
-	b->id = x;
-	assert(x >= 0);
-	return x - 1;
-}
-
-/* fill the rpo information in blocks
- */
-void
-fillrpo(Fn *f)
-{
-	int n;
-	Blk *b, **p;
-
-	for (b=f->start; b; b=b->link)
-		b->id = -1;
-	n = 1 + rporec(f->start, f->nblk-1);
-	f->nblk -= n;
-	f->rpo = alloc(f->nblk * sizeof f->rpo[0]);
-	for (p=&f->start; *p;) {
-		b = *p;
-		if (b->id == -1) {
-			*p = b->link;
-			/* todo, free block */
-		} else {
-			b->id -= n;
-			f->rpo[b->id] = b;
-			p=&(*p)->link;
-		}
-	}
-}
-
-/* for dominators computation, read
- * "A Simple, Fast Dominance Algorithm"
- * by K. Cooper, T. Harvey, and K. Kennedy.
- */
-
-static Blk *
-inter(Blk *b1, Blk *b2)
-{
-	Blk *bt;
-
-	if (b1 == 0)
-		return b2;
-	while (b1 != b2) {
-		if (b1->id < b2->id) {
-			bt = b1;
-			b1 = b2;
-			b2 = bt;
-		}
-		while (b1->id > b2->id) {
-			b1 = b1->idom;
-			assert(b1);
-		}
-	}
-	return b1;
-}
-
-static void
-filldom(Fn *fn)
-{
-	Blk *b, *d;
-	int ch, n;
-	uint p;
-
-	for (b=fn->start; b; b=b->link) {
-		b->idom = 0;
-		b->dom = 0;
-		b->dlink = 0;
-	}
-	do {
-		ch = 0;
-		for (n=1; n<fn->nblk; n++) {
-			b = fn->rpo[n];
-			d = 0;
-			for (p=0; p<b->npred; p++)
-				if (b->pred[p]->idom
-				||  b->pred[p] == fn->start)
-					d = inter(d, b->pred[p]);
-			if (d != b->idom) {
-				ch++;
-				b->idom = d;
-			}
-		}
-	} while (ch);
-	for (b=fn->start; b; b=b->link)
-		if ((d=b->idom)) {
-			assert(d != b);
-			b->dlink = d->dom;
-			d->dom = b;
-		}
-}
-
-static int
-sdom(Blk *b1, Blk *b2)
-{
-	assert(b1 && b2);
-	if (b1 == b2)
-		return 0;
-	while (b2->id > b1->id)
-		b2 = b2->idom;
-	return b1 == b2;
-}
-
-static int
-dom(Blk *b1, Blk *b2)
-{
-	return b1 == b2 || sdom(b1, b2);
-}
-
-static void
-addfron(Blk *a, Blk *b)
-{
-	int n;
-
-	for (n=0; n<a->nfron; n++)
-		if (a->fron[n] == b)
-			return;
-	if (!a->nfron)
-		a->fron = vnew(++a->nfron, sizeof a->fron[0]);
-	else
-		vgrow(&a->fron, ++a->nfron);
-	a->fron[a->nfron-1] = b;
-}
-
-static void
-fillfron(Fn *fn)
-{
-	Blk *a, *b;
-
-	for (b=fn->start; b; b=b->link) {
-		if (b->s1)
-			for (a=b; !sdom(a, b->s1); a=a->idom)
-				addfron(a, b->s1);
-		if (b->s2)
-			for (a=b; !sdom(a, b->s2); a=a->idom)
-				addfron(a, b->s2);
-	}
-}
-
-static Ref
-refindex(int t, Fn *fn)
-{
-	return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn);
-}
-
-static void
-phiins(Fn *fn)
-{
-	BSet u[1], defs[1];
-	Blk *a, *b, **blist, **be, **bp;
-	Ins *i;
-	Phi *p;
-	Ref r;
-	int t, n, k, nt;
-
-	bsinit(u, fn->nblk);
-	bsinit(defs, fn->nblk);
-	blist = emalloc(fn->nblk * sizeof blist[0]);
-	be = &blist[fn->nblk];
-	nt = fn->ntmp;
-	for (t=Tmp0; t<nt; t++) {
-		fn->tmp[t].visit = 0;
-		if (fn->tmp[t].phi != 0)
-			continue;
-		bszero(u);
-		k = -1;
-		bp = be;
-		for (b=fn->start; b; b=b->link) {
-			b->visit = 0;
-			r = R;
-			for (i=b->ins; i-b->ins < b->nins; i++) {
-				if (!req(r, R)) {
-					if (req(i->arg[0], TMP(t)))
-						i->arg[0] = r;
-					if (req(i->arg[1], TMP(t)))
-						i->arg[1] = r;
-				}
-				if (req(i->to, TMP(t))) {
-					if (!bshas(b->out, t)) {
-						if (fn->tmp[t].ndef == 1)
-							r = TMP(t);
-						else
-							r = refindex(t, fn);
-						i->to = r;
-					} else {
-						if (!bshas(u, b->id)) {
-							bsset(u, b->id);
-							*--bp = b;
-						}
-						if (k == -1)
-							k = i->cls;
-						assert(k == i->cls);
-					}
-				}
-			}
-			if (!req(r, R) && req(b->jmp.arg, TMP(t)))
-				b->jmp.arg = r;
-		}
-		bscopy(defs, u);
-		while (bp != be) {
-			fn->tmp[t].visit = t;
-			b = *bp++;
-			bsclr(u, b->id);
-			for (n=0; n<b->nfron; n++) {
-				a = b->fron[n];
-				if (a->visit++ == 0)
-				if (bshas(a->in, t)) {
-					p = alloc(sizeof *p);
-					p->cls = k;
-					p->to = TMP(t);
-					p->link = a->phi;
-					a->phi = p;
-					if (!bshas(defs, a->id))
-					if (!bshas(u, a->id)) {
-						bsset(u, a->id);
-						*--bp = a;
-					}
-				}
-			}
-		}
-	}
-	free(blist);
-}
-
-typedef struct Name Name;
-struct Name {
-	Ref r;
-	Blk *b;
-	Name *up;
-};
-
-static Name *namel;
-
-static Name *
-nnew(Ref r, Blk *b, Name *up)
-{
-	Name *n;
-
-	if (namel) {
-		n = namel;
-		namel = n->up;
-	} else
-		/* could use alloc, here
-		 * but namel should be reset
-		 */
-		n = emalloc(sizeof *n);
-	n->r = r;
-	n->b = b;
-	n->up = up;
-	return n;
-}
-
-static void
-nfree(Name *n)
-{
-	n->up = namel;
-	namel = n;
-}
-
-static void
-rendef(Ref *r, Blk *b, Name **stk, Fn *fn)
-{
-	Ref r1;
-	int t;
-
-	t = r->val;
-	if (req(*r, R) || !fn->tmp[t].visit)
-		return;
-	r1 = refindex(t, fn);
-	fn->tmp[r1.val].visit = t;
-	stk[t] = nnew(r1, b, stk[t]);
-	*r = r1;
-}
-
-static Ref
-getstk(int t, Blk *b, Name **stk)
-{
-	Name *n, *n1;
-
-	n = stk[t];
-	while (n && !dom(n->b, b)) {
-		n1 = n;
-		n = n->up;
-		nfree(n1);
-	}
-	stk[t] = n;
-	if (!n) {
-		/* uh, oh, warn */
-		return CON_Z;
-	} else
-		return n->r;
-}
-
-static void
-renblk(Blk *b, Name **stk, Fn *fn)
-{
-	Phi *p;
-	Ins *i;
-	Blk *s, **ps, *succ[3];
-	int t, m;
-
-	for (p=b->phi; p; p=p->link)
-		rendef(&p->to, b, stk, fn);
-	for (i=b->ins; i-b->ins < b->nins; i++) {
-		for (m=0; m<2; m++) {
-			t = i->arg[m].val;
-			if (rtype(i->arg[m]) == RTmp)
-			if (fn->tmp[t].visit)
-				i->arg[m] = getstk(t, b, stk);
-		}
-		rendef(&i->to, b, stk, fn);
-	}
-	t = b->jmp.arg.val;
-	if (rtype(b->jmp.arg) == RTmp)
-	if (fn->tmp[t].visit)
-		b->jmp.arg = getstk(t, b, stk);
-	succ[0] = b->s1;
-	succ[1] = b->s2;
-	succ[2] = 0;
-	for (ps=succ; (s=*ps); ps++)
-		for (p=s->phi; p; p=p->link) {
-			t = p->to.val;
-			if ((t=fn->tmp[t].visit)) {
-				m = p->narg++;
-				if (m == NPred)
-					diag("ssa: too many phi arguments");
-				p->arg[m] = getstk(t, b, stk);
-				p->blk[m] = b;
-			}
-		}
-	for (s=b->dom; s; s=s->dlink)
-		renblk(s, stk, fn);
-}
-
-/* require ndef */
-void
-ssa(Fn *fn)
-{
-	Name **stk, *n;
-	int d, nt;
-	Blk *b, *b1;
-
-	nt = fn->ntmp;
-	stk = emalloc(nt * sizeof stk[0]);
-	d = debug['L'];
-	debug['L'] = 0;
-	filldom(fn);
-	if (debug['N']) {
-		fprintf(stderr, "\n> Dominators:\n");
-		for (b1=fn->start; b1; b1=b1->link) {
-			if (!b1->dom)
-				continue;
-			fprintf(stderr, "%10s:", b1->name);
-			for (b=b1->dom; b; b=b->dlink)
-				fprintf(stderr, " %s", b->name);
-			fprintf(stderr, "\n");
-		}
-	}
-	fillfron(fn);
-	filllive(fn);
-	phiins(fn);
-	renblk(fn->start, stk, fn);
-	while (nt--)
-		while ((n=stk[nt])) {
-			stk[nt] = n->up;
-			nfree(n);
-		}
-	debug['L'] = d;
-	free(stk);
-	if (debug['N']) {
-		fprintf(stderr, "\n> After SSA construction:\n");
-		printfn(fn, stderr);
-	}
-}
diff --git a/src/util.c b/src/util.c
deleted file mode 100644
index 65b3ff8..0000000
--- a/src/util.c
+++ /dev/null
@@ -1,329 +0,0 @@
-#include "all.h"
-
-typedef struct Bitset Bitset;
-typedef struct Vec Vec;
-
-struct Vec {
-	ulong mag;
-	size_t esz;
-	ulong cap;
-	union {
-		long long ll;
-		long double ld;
-		void *ptr;
-	} align[];
-};
-
-enum {
-	VMin = 2,
-	VMag = 0xcabba9e,
-	NPtr = 256,
-};
-
-Typ typ[NTyp];
-Ins insb[NIns], *curi;
-
-static void *ptr[NPtr];
-static void **pool = ptr;
-static int nptr = 1;
-
-void
-diag(char *s)
-{
-	fputs(s, stderr);
-	fputc('\n', stderr);
-	abort();
-}
-
-void *
-emalloc(size_t n)
-{
-	void *p;
-
-	p = calloc(1, n);
-	if (!p)
-		diag("emalloc: out of memory");
-	return p;
-}
-
-void *
-alloc(size_t n)
-{
-	void **pp;
-
-	if (n == 0)
-		return 0;
-	if (nptr >= NPtr) {
-		pp = emalloc(NPtr * sizeof(void *));
-		pp[0] = pool;
-		pool = pp;
-		nptr = 1;
-	}
-	return pool[nptr++] = emalloc(n);
-}
-
-void
-freeall()
-{
-	void **pp;
-
-	for (;;) {
-		for (pp = &pool[1]; pp < &pool[nptr]; pp++)
-			free(*pp);
-		pp = pool[0];
-		if (!pp)
-			break;
-		free(pool);
-		pool = pp;
-		nptr = NPtr;
-	}
-	nptr = 1;
-}
-
-Blk *
-blknew()
-{
-	static Blk z;
-	Blk *b;
-
-	b = alloc(sizeof *b);
-	*b = z;
-	return b;
-}
-
-void
-emit(int op, int k, Ref to, Ref arg0, Ref arg1)
-{
-	if (curi == insb)
-		diag("emit: too many instructions");
-	*--curi = (Ins){
-		.op = op, .cls = k,
-		.to = to, .arg = {arg0, arg1}
-	};
-}
-
-void
-emiti(Ins i)
-{
-	emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]);
-}
-
-void
-idup(Ins **pd, Ins *s, ulong n)
-{
-	*pd = alloc(n * sizeof(Ins));
-	memcpy(*pd, s, n * sizeof(Ins));
-}
-
-Ins *
-icpy(Ins *d, Ins *s, ulong n)
-{
-	memcpy(d, s, n * sizeof(Ins));
-	return d + n;
-}
-
-void *
-vnew(ulong len, size_t esz)
-{
-	ulong cap;
-	Vec *v;
-
-	for (cap=VMin; cap<len; cap*=2)
-		;
-	v = alloc(cap * esz + sizeof(Vec));
-	v->mag = VMag;
-	v->cap = cap;
-	v->esz = esz;
-	return v + 1;
-}
-
-void
-vgrow(void *vp, ulong len)
-{
-	Vec *v;
-	void *v1;
-
-	v = *(Vec **)vp - 1;
-	assert(v+1 && v->mag == VMag);
-	if (v->cap >= len)
-		return;
-	v1 = vnew(len, v->esz);
-	memcpy(v1, v+1, v->cap * v->esz);
-	*(Vec **)vp = v1;
-}
-
-int
-phicls(int t, Tmp *tmp /*, int c*/)
-{
-	if (tmp[t].phi)
-		return tmp[t].phi;
-	return t;
-#if 0
-	int t1;
-
-	t1 = tmp[t].phi;
-	if (!t1)
-		t1 = t;
-	if (t != t1) {
-		t1 = phitmp(t1, tmp, c);
-		if (c)
-			tmp[t].phi = t1;
-	}
-	return t1;
-#endif
-}
-
-Ref
-newtmp(char *prfx, int k,  Fn *fn)
-{
-	static int n;
-	int t;
-
-	t = fn->ntmp++;
-	vgrow(&fn->tmp, fn->ntmp);
-	sprintf(fn->tmp[t].name, "%s%d", prfx, ++n);
-	fn->tmp[t].cls = k;
-	fn->tmp[t].slot = -1;
-	fn->tmp[t].nuse = +1;
-	fn->tmp[t].ndef = +1;
-	return TMP(t);
-}
-
-Ref
-getcon(int64_t val, Fn *fn)
-{
-	int c;
-
-	for (c=0; c<fn->ncon; c++)
-		if (fn->con[c].type == CBits && fn->con[c].bits.i == val)
-			return CON(c);
-	fn->ncon++;
-	vgrow(&fn->con, fn->ncon);
-	fn->con[c] = (Con){.type = CBits, .bits.i = val};
-	return CON(c);
-}
-
-void
-addcon(Con *c0, Con *c1)
-{
-	if (c0->type == CUndef)
-		*c0 = *c1;
-	else {
-		if (c1->type == CAddr) {
-			if (c0->type == CAddr)
-				diag("addcon: adding two addresses");
-			c0->type = CAddr;
-			strcpy(c0->label, c1->label);
-		}
-		c0->bits.i += c1->bits.i;
-	}
-}
-
-void
-bsinit(BSet *bs, uint n)
-{
-	n = (n + NBit-1) / NBit;
-	bs->nt = n;
-	bs->t = alloc(n * sizeof bs->t[0]);
-}
-
-uint
-bscount(BSet *bs)
-{
-	uint i, j, n;
-
-	n = 0;
-	for (i=0; i<bs->nt; i++)
-		for (j=0; j<NBit; j++)
-			if (bs->t[i] & BIT(j))
-				n++;
-	return n;
-}
-
-static inline uint
-bsmax(BSet *bs)
-{
-	return bs->nt * NBit;
-}
-
-void
-bsset(BSet *bs, uint elt)
-{
-	assert(elt < bsmax(bs));
-	bs->t[elt/NBit] |= BIT(elt%NBit);
-}
-
-void
-bsclr(BSet *bs, uint elt)
-{
-	assert(elt < bsmax(bs));
-	bs->t[elt/NBit] &= ~BIT(elt%NBit);
-}
-
-#define BSOP(f, op)                           \
-	void                                  \
-	f(BSet *a, BSet *b)                   \
-	{                                     \
-		uint i;                       \
-		                              \
-		assert(a->nt == b->nt);       \
-		for (i=0; i<a->nt; i++)       \
-			a->t[i] op b->t[i];   \
-	}
-
-BSOP(bscopy, =)
-BSOP(bsunion, |=)
-BSOP(bsinter, &=)
-BSOP(bsdiff, &= ~)
-
-int
-bsequal(BSet *a, BSet *b)
-{
-	uint i;
-
-	assert(a->nt == b->nt);
-	for (i=0; i<a->nt; i++)
-		if (a->t[i] != b->t[i])
-			return 0;
-	return 1;
-}
-
-void
-bszero(BSet *bs)
-{
-	memset(bs->t, 0, bs->nt * sizeof bs->t[0]);
-}
-
-/* iterates on a bitset, use as follows
- *
- * 	for (i=0; bsiter(set, &i); i++)
- * 		use(i);
- *
- */
-int
-bsiter(BSet *bs, uint *elt)
-{
-	uint i;
-
-	for (i=*elt;; i++) {
-		while (i < bsmax(bs) && !bs->t[i/NBit])
-			i = (i + NBit) & -NBit;
-		if (i >= bsmax(bs))
-			return 0;
-		if (bshas(bs, i)) {
-			*elt = i;
-			return 1;
-		}
-	}
-}
-
-void
-dumpts(BSet *bs, Tmp *tmp, FILE *f)
-{
-	uint t;
-
-	fprintf(f, "[");
-	for (t=Tmp0; bsiter(bs, &t); t++)
-		fprintf(f, " %s", tmp[t].name);
-	fprintf(f, " ]\n");
-}