summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--lisc/Makefile2
-rw-r--r--lisc/isel.c37
-rw-r--r--lisc/lisc.h49
-rw-r--r--lisc/live.c16
-rw-r--r--lisc/main.c22
-rw-r--r--lisc/parse.c113
-rw-r--r--lisc/rega.c159
-rw-r--r--lisc/spill.c151
-rw-r--r--lisc/ssa.c48
9 files changed, 324 insertions, 273 deletions
diff --git a/lisc/Makefile b/lisc/Makefile
index 26369f2..afaa538 100644
--- a/lisc/Makefile
+++ b/lisc/Makefile
@@ -1,5 +1,5 @@
 BIN = lisc
-OBJ = main.o parse.o ssa.o live.o # isel.o spill.o rega.o emit.o
+OBJ = parse.o ssa.o live.o isel.o spill.o rega.o main.o # emit.o main.o
 
 CFLAGS = -Wall -Wextra -std=c11 -g -pedantic
 
diff --git a/lisc/isel.c b/lisc/isel.c
index f933c92..4fad703 100644
--- a/lisc/isel.c
+++ b/lisc/isel.c
@@ -25,17 +25,18 @@ emit(short op, Ref to, Ref arg0, Ref arg1)
 }
 
 static int
-newsym(int type, Fn *fn)
+newtmp(int type, Fn *fn)
 {
-	int s;
+	static int n;
+	int t;
 
-	s = fn->nsym++;
-	fn->sym = realloc(fn->sym, fn->nsym * sizeof(Sym));
-	if (!fn->sym)
+	t = fn->ntmp++;
+	fn->tmp = realloc(fn->tmp, fn->ntmp * sizeof fn->tmp[0]);
+	if (!fn->tmp)
 		diag("isel: out of memory");
-	fn->sym[s] = (Sym){.type = type};
-	sprintf(fn->sym[s].name, "isel%d", s);
-	return s;
+	fn->tmp[t] = (Tmp){.type = type};
+	sprintf(fn->tmp[t].name, "isel%d", ++n);
+	return t;
 }
 
 static void
@@ -46,27 +47,27 @@ sel(Ins *i, Fn *fn)
 
 	switch (i->op) {
 	case ODiv:
-		r0 = SYM(RAX);
-		r1 = SYM(RDX);
+		r0 = REG(RAX);
+		r1 = REG(RDX);
 	if (0) {
 	case ORem:
-		r0 = SYM(RDX);
-		r1 = SYM(RAX);
+		r0 = REG(RDX);
+		r1 = REG(RAX);
 	}
 		emit(OCopy, i->to, r0, R);
 		emit(OCopy, R, r1, R);
-		if (rtype(i->arg[1]) == RCons) {
+		if (rtype(i->arg[1]) == RCon) {
 			/* immediates not allowed for
 			 * divisions in x86
 			 */
-			t = newsym(fn->sym[i->to.val].type, fn);
-			r0 = SYM(t);
+			t = newtmp(fn->tmp[i->to.val].type, fn);
+			r0 = TMP(t);
 		} else
 			r0 = i->arg[1];
 		emit(OXDiv, R, r0, R);
-		emit(OSign, SYM(RDX), SYM(RAX), R);
-		emit(OCopy, SYM(RAX), i->arg[0], R);
-		if (rtype(i->arg[1]) == RCons)
+		emit(OSign, REG(RDX), REG(RAX), R);
+		emit(OCopy, REG(RAX), i->arg[0], R);
+		if (rtype(i->arg[1]) == RCon)
 			emit(OCopy, r0, i->arg[1], R);
 		break;
 	case OAdd:
diff --git a/lisc/lisc.h b/lisc/lisc.h
index 30feac4..dc60794 100644
--- a/lisc/lisc.h
+++ b/lisc/lisc.h
@@ -12,8 +12,8 @@ typedef struct OpDesc OpDesc;
 typedef struct Ins Ins;
 typedef struct Phi Phi;
 typedef struct Blk Blk;
-typedef struct Sym Sym;
-typedef struct Cons Cons;
+typedef struct Tmp Tmp;
+typedef struct Con Con;
 typedef struct Fn Fn;
 
 typedef enum { U, F, T } B3;
@@ -40,10 +40,28 @@ enum {
 	RBP, /* reserved */
 	RSP,
 
+	EAX, /* 32bits */
+	ECX,
+	EDX,
+	ESI,
+	EDI,
+	R8D,
+	R9D,
+	R10D,
+	R11D,
+
+	EBX,
+	R12D,
+	R13D,
+	R14D,
+	R15D,
+
 	// NReg = R15 - RAX + 1
 	NReg = 3 /* for test purposes */
 };
 
+#define LTW(r) (r + (EAX-RAX))
+
 enum {
 	NString = 32,
 	NPred   = 15,
@@ -68,16 +86,16 @@ struct Ref {
 };
 
 enum {
-	RSym,
-	RCons,
+	RTmp,
+	RCon,
 	RSlot,
 	RReg,
 	NRef = (1<<14) - 1
 };
 
 #define R        (Ref){0, 0}
-#define SYM(x)   (Ref){RSym, x}
-#define CONS(x)  (Ref){RCons, x}
+#define TMP(x)   (Ref){RTmp, x}
+#define CON(x)   (Ref){RCon, x}
 #define SLOT(x)  (Ref){RSlot, x}
 #define REG(x)   (Ref){RReg, x}
 
@@ -153,11 +171,11 @@ struct Blk {
 	char name[NString];
 };
 
-struct Sym {
+struct Tmp {
 	enum {
-		SUndef,
-		SWord,
-		SLong,
+		TUndef,
+		TWord,
+		TLong,
 	} type;
 	char name[NString];
 	uint ndef, nuse;
@@ -166,7 +184,7 @@ struct Sym {
 	int hint;
 };
 
-struct Cons {
+struct Con {
 	enum {
 		CUndef,
 		CNum,
@@ -178,9 +196,10 @@ struct Cons {
 
 struct Fn {
 	Blk *start;
-	Sym *sym;
-	Cons *cons;
-	int nsym;
+	Tmp *tmp;
+	Con *con;
+	int ntmp;
+	int ncon;
 	int nblk;
 	Blk **rpo;
 	uint nspill;
@@ -189,7 +208,7 @@ struct Fn {
 
 /* main.c */
 extern char debug['Z'+1];
-void dumpss(Bits *, Sym *, FILE *);
+void dumpts(Bits *, Tmp *, FILE *);
 
 /* parse.c */
 extern OpDesc opdesc[];
diff --git a/lisc/live.c b/lisc/live.c
index d1c31a2..aec7701 100644
--- a/lisc/live.c
+++ b/lisc/live.c
@@ -6,7 +6,7 @@ bset(Ref r, Blk *b, Bits *rb, int *nlv)
 	Bits *bs;
 
 	switch (rtype(r)) {
-	case RSym:
+	case RTmp:
 		bs = &b->in;
 		BSET(b->gen, r.val);
 		break;
@@ -14,7 +14,6 @@ bset(Ref r, Blk *b, Bits *rb, int *nlv)
 		bs = rb;
 		break;
 	default:
-		diag("live: unhandled reference");
 		return;
 	}
 	if (!BGET(*bs, r.val)) {
@@ -36,7 +35,7 @@ filllive(Fn *f)
 	uint a;
 	Bits tb, rb;
 
-	assert(f->nsym <= NBit*BITS);
+	assert(f->ntmp <= NBit*BITS);
 	for (b=f->start; b; b=b->link) {
 		b->in = (Bits){{0}};
 		b->out = (Bits){{0}};
@@ -64,15 +63,18 @@ Again:
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
 			i--;
 			switch (rtype(i->to)) {
-			case RSym:
+			default:
+				diag("live: unhandled destination");
+			case RTmp:
 				nlv -= BGET(b->in, i->to.val);
 				BCLR(b->in, i->to.val);
 				break;
 			case RReg:
 				nlv -= BGET(rb, i->to.val);
 				BCLR(rb, i->to.val);
-			default:
-				diag("live: unhandled destination");
+				break;
+			case -1:
+				break;
 			}
 			bset(i->arg[0], b, &rb, &nlv);
 			bset(i->arg[1], b, &rb, &nlv);
@@ -83,7 +85,7 @@ Again:
 		for (p=b->phi; p; p=p->link) {
 			BCLR(b->in, p->to.val);
 			for (a=0; a<p->narg; a++)
-				if (rtype(p->arg[a]) == RSym)
+				if (rtype(p->arg[a]) == RTmp)
 					BSET(p->blk[a]->out, p->arg[a].val);
 		}
 	}
diff --git a/lisc/main.c b/lisc/main.c
index f66041a..5eecd9b 100644
--- a/lisc/main.c
+++ b/lisc/main.c
@@ -7,14 +7,14 @@ char debug['Z'+1] = {
 };
 
 void
-dumpss(Bits *b, Sym *s, FILE *f)
+dumpts(Bits *b, Tmp *tmp, FILE *f)
 {
 	int t;
 
 	fprintf(f, "[");
 	for (t=0; t<BITS*NBit; t++)
 		if (BGET(*b, t))
-			fprintf(f, " %s", s[t].name);
+			fprintf(f, " %s", tmp[t].name);
 	fprintf(f, " ]\n");
 }
 
@@ -33,13 +33,13 @@ main(int ac, char *av[])
 
 	switch (opt) {
 	case 'f': {
-		int s, nsym;
+		int t, ntmp;
 
 		fprintf(stderr, "[Testing SSA Reconstruction:");
 		fillpreds(fn);
-		for (nsym=fn->nsym, s=0; s<nsym; s++) {
-			fprintf(stderr, " %s", fn->sym[s].name);
-			ssafix(fn, s);
+		for (ntmp=fn->ntmp, t=0; t<ntmp; t++) {
+			fprintf(stderr, " %s", fn->tmp[t].name);
+			ssafix(fn, t);
 		}
 		fprintf(stderr, "]\n");
 		break;
@@ -68,9 +68,9 @@ main(int ac, char *av[])
 		for (b=fn->start; b; b=b->link) {
 			printf("> Block %s\n", b->name);
 			printf("\t in:   ");
-			dumpss(&b->in, fn->sym, stdout);
+			dumpts(&b->in, fn->tmp, stdout);
 			printf("\tout:   ");
-			dumpss(&b->out, fn->sym, stdout);
+			dumpts(&b->out, fn->tmp, stdout);
 			printf("\tnlive: %d\n", b->nlive);
 		}
 		pr = 0;
@@ -96,7 +96,7 @@ main(int ac, char *av[])
 		for (b=fn->start; b; b=b->link) {
 			printf("\t%-10s (% 5d) ",
 				b->name, b->loop);
-			dumpss(&b->out, fn->sym, stdout);
+			dumpts(&b->out, fn->tmp, stdout);
 		}
 		printf("\n");
 		break;
@@ -132,7 +132,7 @@ main(int ac, char *av[])
 				break;
 			} else
 				fn->rpo[n]->link = fn->rpo[n+1];
-		emitfn(fn, stdout);
+		// emitfn(fn, stdout);
 		pr = 0;
 		break;
 	}
@@ -142,5 +142,5 @@ main(int ac, char *av[])
 
 	if (pr)
 		printfn(fn, stdout);
-	return 0;
+	exit(0);
 }
diff --git a/lisc/parse.c b/lisc/parse.c
index e1a1174..fe82cd8 100644
--- a/lisc/parse.c
+++ b/lisc/parse.c
@@ -4,8 +4,8 @@
 #include <ctype.h>
 
 enum {
-	NSym = 256,
-	NCons = 256,
+	NTmp = 256,
+	NCon = 256,
 };
 
 Ins insb[NIns], *curi;
@@ -48,7 +48,7 @@ typedef enum {
 	TL,
 
 	TNum,
-	TVar,
+	TTmp,
 	TLbl,
 	TAddr,
 	TEq,
@@ -68,10 +68,10 @@ static struct {
 } tokval;
 static int lnum;
 
-static Sym sym[NSym];
-static Cons cons[NCons];
-static int nsym;
-static int ncons;
+static Tmp tmp[NTmp];
+static Con con[NCon];
+static int ntmp;
+static int ncon;
 static Phi **plink;
 static Blk *bmap[NBlk+1];
 static Blk *curb;
@@ -150,7 +150,7 @@ lex()
 	case '=':
 		return TEq;
 	case '%':
-		t = TVar;
+		t = TTmp;
 		c = fgetc(inf);
 		goto Alpha;
 	case '@':
@@ -242,45 +242,45 @@ blocka()
 }
 
 static Ref
-varref(char *v)
+tmpref(char *v)
 {
-	int s;
-
-	for (s=0; s<nsym; s++)
-		if (strcmp(v, sym[s].name) == 0)
-			return SYM(s);
-	if (nsym++ >= NSym)
-		err("too many symbols");
-	strcpy(sym[s].name, v);
-	return SYM(s);
+	int t;
+
+	for (t=0; t<ntmp; t++)
+		if (strcmp(v, tmp[t].name) == 0)
+			return TMP(t);
+	if (ntmp++ >= NTmp)
+		err("too many temporaries");
+	strcpy(tmp[t].name, v);
+	return TMP(t);
 }
 
 static Ref
 parseref()
 {
-	Cons c;
+	Con c;
 	int i;
 
 	switch (next()) {
-	case TVar:
-		return varref(tokval.str);
+	case TTmp:
+		return tmpref(tokval.str);
 	case TNum:
-		c = (Cons){.type = CNum, .val = tokval.num};
+		c = (Con){.type = CNum, .val = tokval.num};
 		strcpy(c.label, "");
 	if (0) {
 	case TAddr:
-		c = (Cons){.type = CAddr, .val = 0};
+		c = (Con){.type = CAddr, .val = 0};
 		strcpy(c.label, tokval.str);
 	}
-		for (i=0; i<ncons; i++)
-			if (cons[i].type == c.type
-			&& cons[i].val == c.val
-			&& strcmp(cons[i].label, c.label) == 0)
-				return CONS(i);
-		if (ncons++ >= NCons)
+		for (i=0; i<ncon; i++)
+			if (con[i].type == c.type
+			&& con[i].val == c.val
+			&& strcmp(con[i].label, c.label) == 0)
+				return CON(i);
+		if (ncon++ >= NCon)
 			err("too many constants");
-		cons[i] = c;
-		return CONS(i);
+		con[i] = c;
+		return CON(i);
 	default:
 		return R;
 	}
@@ -358,7 +358,7 @@ parseline(PState ps)
 		err("label, instruction or jump expected");
 	case TEOF:
 		return PEnd;
-	case TVar:
+	case TTmp:
 		break;
 	case TLbl:
 		b = findblk(tokval.str);
@@ -400,14 +400,14 @@ parseline(PState ps)
 		closeblk();
 		return PLbl;
 	}
-	r = varref(tokval.str);
+	r = tmpref(tokval.str);
 	expect(TEq);
 	switch (next()) {
 	case TW:
-		sym[r.val].type = SWord;
+		tmp[r.val].type = TWord;
 		break;
 	case TL:
-		sym[r.val].type = SLong;
+		tmp[r.val].type = TLong;
 		break;
 	default:
 		err("class expected after =");
@@ -490,10 +490,10 @@ parsefn(FILE *f)
 	inf = f;
 	for (i=0; i<NBlk; i++)
 		bmap[i] = 0;
-	for (i=0; i<NSym; i++)
-		sym[i] = (Sym){.name = ""};
-	nsym = 0;
-	ncons = 0;
+	for (i=0; i<NTmp; i++)
+		tmp[i] = (Tmp){.name = ""};
+	ntmp = 1;
+	ncon = 0;
 	curi = insb;
 	curb = 0;
 	lnum = 1;
@@ -508,11 +508,12 @@ parsefn(FILE *f)
 		err("empty file");
 	if (curb->jmp.type == JXXX)
 		err("last block misses jump");
-	fn->sym = alloc(nsym * sizeof sym[0]);
-	memcpy(fn->sym, sym, nsym * sizeof sym[0]);
-	fn->cons = alloc(ncons * sizeof cons[0]);
-	memcpy(fn->cons, cons, ncons * sizeof cons[0]);
-	fn->nsym = nsym;
+	fn->tmp = alloc(ntmp * sizeof tmp[0]);
+	memcpy(fn->tmp, tmp, ntmp * sizeof tmp[0]);
+	fn->con = alloc(ncon * sizeof con[0]);
+	memcpy(fn->con, con, ncon * sizeof con[0]);
+	fn->ntmp = ntmp;
+	fn->ncon = ncon;
 	fn->nblk = nblk;
 	fn->rpo = 0;
 	return fn;
@@ -522,24 +523,24 @@ static char *
 printref(Ref r, Fn *fn, FILE *f)
 {
 	static char *ttoa[] = {
-		[SUndef] = "?",
-		[SWord] = "w",
-		[SLong] = "l",
+		[TUndef] = "?",
+		[TWord] = "w",
+		[TLong] = "l",
 	};
 
 	switch (r.type) {
-	case RSym:
-		fprintf(f, "%%%s", fn->sym[r.val].name);
-		return ttoa[fn->sym[r.val].type];
-	case RCons:
-		switch (fn->cons[r.val].type) {
+	case RTmp:
+		fprintf(f, "%%%s", fn->tmp[r.val].name);
+		return ttoa[fn->tmp[r.val].type];
+	case RCon:
+		switch (fn->con[r.val].type) {
 		case CAddr:
-			fprintf(f, "$%s", fn->cons[r.val].label);
-			if (fn->cons[r.val].val)
-				fprintf(f, "%+"PRIi64, fn->cons[r.val].val);
+			fprintf(f, "$%s", fn->con[r.val].label);
+			if (fn->con[r.val].val)
+				fprintf(f, "%+"PRIi64, fn->con[r.val].val);
 			break;
 		case CNum:
-			fprintf(f, "%"PRIi64, fn->cons[r.val].val);
+			fprintf(f, "%"PRIi64, fn->con[r.val].val);
 			break;
 		case CUndef:
 			diag("printref: invalid constant");
diff --git a/lisc/rega.c b/lisc/rega.c
index ac0b2d4..b839538 100644
--- a/lisc/rega.c
+++ b/lisc/rega.c
@@ -11,7 +11,7 @@ struct RMap {
 };
 
 extern Ins insb[NIns], *curi;
-static Sym *sym;       /* symbol table in use */
+static Tmp *tmp;       /* function temporaries */
 static struct {
 	Ref src, dst;
 } *pm;                 /* parallel move constructed */
@@ -35,17 +35,23 @@ rref(RMap *m, int t)
 
 	r = rfind(m, t);
 	if (r == -1) {
-		s = sym[t].spill;
+		s = tmp[t].spill;
 		assert(s && "should have spilled");
 		return SLOT(s);
 	} else
-		return SYM(r);
+		switch (tmp[t].type) {
+		default:
+			assert(!"invalid temporary");
+		case TWord:
+			return REG(LTW(r));
+		case TLong:
+			return REG(r);
+		}
 }
 
 static void
 radd(RMap *m, int t, int r)
 {
-	assert(sym[t].type == STmp && "invalid symbol");
 	assert(RAX <= r && r < RAX + NReg && "invalid register");
 	assert(!BGET(m->bt, t) && "temp has mapping");
 	assert(!BGET(m->br, r) && "reg already allocated");
@@ -62,22 +68,19 @@ ralloc(RMap *m, int t)
 {
 	int r;
 
-	if (sym[t].type == SReg) {
-		assert(BGET(m->br, t));
-		r = t;
-	} else if (BGET(m->bt, t)) {
+	if (BGET(m->bt, t)) {
 		r = rfind(m, t);
-		assert(r > 0);
+		assert(r != -1);
 	} else {
-		r = sym[t].hint;
+		r = tmp[t].hint;
 		if (r == -1 || BGET(m->br, r))
 			for (r=RAX; BGET(m->br, r); r++)
 				;
 		radd(m, t, r);
-		if (sym[t].hint == -1)
-			sym[t].hint = r;
+		if (tmp[t].hint == -1)
+			tmp[t].hint = r;
 	}
-	return SYM(r);
+	return REG(r);
 }
 
 static int
@@ -85,21 +88,16 @@ rfree(RMap *m, int t)
 {
 	int i, r;
 
-	if (sym[t].type == SReg) {
-		assert(BGET(m->br, t));
-		BCLR(m->br, t);
-		return t;
-	}
 	if (!BGET(m->bt, t))
-		return 0;
+		return -1;
 	for (i=0; m->t[i] != t; i++)
 		assert(i+1 < m->n);
 	r = m->r[i];
 	BCLR(m->bt, t);
 	BCLR(m->br, r);
 	m->n--;
-	memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof(int));
-	memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof(int));
+	memmove(&m->t[i], &m->t[i+1], (m->n-i) * m->t[0]);
+	memmove(&m->r[i], &m->r[i+1], (m->n-i) * m->r[0]);
 	return r;
 }
 
@@ -109,18 +107,12 @@ mdump(RMap *m)
 	int i;
 
 	for (i=0; i<m->n; i++)
-		fprintf(stderr, " (%s, %s)",
-			sym[m->t[i]].name,
-			sym[m->r[i]].name);
+		fprintf(stderr, " (%s, reg%d)",
+			tmp[m->t[i]].name,
+			m->r[i]);
 	fprintf(stderr, "\n");
 }
 
-static inline int
-isreg(Ref r)
-{
-	return rtype(r) == RSym && sym[r.val].type == SReg;
-}
-
 static void
 pmadd(Ref src, Ref dst)
 {
@@ -199,22 +191,25 @@ dopm(Blk *b, Ins *i, RMap *m)
 
 	npm = 0;
 	i1 = i+1;
-	if (isreg(i->to))
+	if (rtype(i->to) == RReg)
 		for (;; i--) {
 			r = i->to.val;
 			rt = i->arg[0];
-			assert(rtype(rt) == RSym);
-			rfree(m, r);
+			assert(rtype(rt) == RTmp);
+			assert(BGET(m->br, r));
+			/* todo, assert that r is not mapped */
+			BCLR(m->br, r);
 			rt = ralloc(m, rt.val);
 			pmadd(rt, i->to);
 			if (i==b->ins
 			|| (i-1)->op!=OCopy
-			|| !isreg((i-1)->to))
+			|| rtype((i-1)->to) != RReg)
 				break;
 		}
-	else if (isreg(i->arg[0]))
+	else if (rtype(i->arg[0]) == RReg)
 		for (;; i--) {
 			r = i->arg[0].val;
+			assert(req(i->to, R) || i->to.type == RTmp);
 			if (req(i->to, R)) {
 				if (BGET(m->br, r)) {
 					for (n=0; m->r[n] != r; n++)
@@ -226,13 +221,13 @@ dopm(Blk *b, Ins *i, RMap *m)
 					pmadd(rt, i->arg[0]);
 				}
 			} else if (BGET(m->bt, i->to.val)) {
-				rt = SYM(rfree(m, i->to.val));
+				rt = TMP(rfree(m, i->to.val));
 				pmadd(i->arg[0], rt);
 			}
 			BSET(m->br, r);
 			if (i==b->ins
 			|| (i-1)->op!=OCopy
-			|| !isreg((i-1)->arg[0]))
+			|| rtype((i-1)->arg[0]) != RReg)
 				break;
 		}
 	else
@@ -257,7 +252,7 @@ dopm(Blk *b, Ins *i, RMap *m)
 void
 rega(Fn *fn)
 {
-	int n, t, u, r, x;
+	int n, t, r, x;
 	Blk *b, *b1, *s, ***ps, *blist;
 	Ins *i;
 	RMap *end, *beg, cur;
@@ -265,27 +260,23 @@ rega(Fn *fn)
 	uint a;
 	Ref src, dst;
 
-	sym = fn->sym;
+	tmp = fn->tmp;
 	end = alloc(fn->ntmp * sizeof end[0]);
 	beg = alloc(fn->ntmp * sizeof beg[0]);
 
 	/* 1. gross hinting setup */
-	for (t=Tmp0; t<fn->ntmp; t++)
-		sym[t].hint = -1;
-	for (b=fn->start; b; b=b->link) {
-		for (i=b->ins; i-b->ins < b->nins; i++) {
-			if (i->op == OCopy
-			&& rtype(i->arg[0]) == RSym
-			&& !req(i->to, R)) {
-				t = i->arg[0].val;
-				u = i->to.val;
-				if (sym[t].type == SReg)
-					sym[u].hint = t;
-				if (sym[u].type == SReg)
-					sym[t].hint = u;
+	for (t=0; t<fn->ntmp; t++)
+		tmp[t].hint = -1;
+	for (b=fn->start; b; b=b->link)
+		for (i=b->ins; i-b->ins < b->nins; i++)
+			if (i->op == OCopy) {
+				if (rtype(i->arg[0]) == RTmp
+				&& rtype(i->to) == RReg)
+					tmp[i->arg[0].val].hint = i->to.val;
+				if (rtype(i->to) == RTmp
+				&& rtype(i->arg[0]) == RReg)
+					tmp[i->to.val].hint = i->arg[0].val;
 			}
-		}
-	}
 
 	/* 2. assign registers backwards */
 	if (debug['R'])
@@ -305,36 +296,48 @@ rega(Fn *fn)
 		 * successor
 		 */
 		if (b1)
-			for (t=Tmp0; t<fn->ntmp; t++)
+			for (t=0; t<fn->ntmp; t++)
 				if (BGET(b->out, t))
 				if ((r = rfind(&beg[b1->id], t)) != -1)
 					radd(&cur, t, r);
 		for (x=0; x<2; x++)
-			for (t=Tmp0; t<fn->ntmp; t++)
-				if (x==1 || sym[t].hint!=-1)
+			for (t=0; t<fn->ntmp; t++)
+				if (x==1 || tmp[t].hint!=-1)
 				if (BGET(b->out, t))
 				if (!BGET(cur.bt, t))
 					ralloc(&cur, t);
 		/* process instructions */
 		end[n] = cur;
-		if (rtype(b->jmp.arg) == RSym)
+		assert(rtype(b->jmp.arg) != RReg);
+		if (rtype(b->jmp.arg) == RTmp)
 			b->jmp.arg = ralloc(&cur, b->jmp.arg.val);
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
 			i--;
 			if (i->op == OCopy /* par. moves are special */
-			&& (isreg(i->arg[0]) || isreg(i->to))) {
+			&& (rtype(i->arg[0]) == RReg || rtype(i->to) == RReg)) {
 				i = dopm(b, i, &cur);
 				continue;
 			}
-			if (!req(i->to, R)) {
+			switch (rtype(i->to)) {
+			default:
+				assert(!"unhandled destination");
+			case RTmp:
 				r = rfree(&cur, i->to.val);
-				if (!r) {
+				if (r == -1) {
 					*i = (Ins){ONop, R, {R, R}};
 					continue;
 				}
-				i->to = SYM(r);
+				i->to = REG(r);
+				break;
+			case RReg:
+				r = i->to.val;
+				assert(BGET(cur.br, r));
+				BCLR(cur.br, r);
+				break;
+			case -1:;
 			}
-			if (rtype(i->arg[0]) == RSym) {
+			switch (rtype(i->arg[0])) {
+			case RTmp:
 				/* <arch>
 				 *   on Intel, we attempt to
 				 *   use the same register
@@ -342,11 +345,16 @@ rega(Fn *fn)
 				 *   first argument
 				 */
 				t = i->arg[0].val;
-				if (sym[t].hint == -1 && r)
-					sym[t].hint = r;
+				if (tmp[t].hint == -1 && r)
+					tmp[t].hint = r;
 				i->arg[0] = ralloc(&cur, t);
+				break;
+			case RReg:
+				BSET(cur.br, i->arg[0].val);
+				break;
 			}
-			if (rtype(i->arg[1]) == RSym) {
+			switch (rtype(i->arg[1])) {
+			case RTmp:
 				/* <arch>
 				 *   on Intel, we have to
 				 *   make sure we avoid the
@@ -359,11 +367,15 @@ rega(Fn *fn)
 				i->arg[1] = ralloc(&cur, t);
 				if (opdesc[i->op].comm == F && r)
 					BCLR(cur.br, r);
+				break;
+			case RReg:
+				BSET(cur.br, i->arg[1].val);
+				break;
 			}
 		}
 		b->in = cur.bt;
 		for (p=b->phi; p; p=p->link)
-			if (rtype(p->to) == RSym)
+			if (rtype(p->to) == RTmp)
 				BCLR(b->in, p->to.val);
 		beg[n] = cur;
 		if (debug['R']) {
@@ -383,23 +395,22 @@ rega(Fn *fn)
 		for (; (s=**ps); ps++) {
 			npm = 0;
 			for (p=s->phi; p; p=p->link) {
-				assert(rtype(p->to) == RSlot
-					|| rtype(p->to) == RSym);
 				dst = p->to;
-				if (rtype(dst) == RSym) {
+				assert(rtype(dst)==RSlot|| rtype(dst)==RTmp);
+				if (rtype(dst) == RTmp) {
 					r = rfind(&beg[s->id], dst.val);
-					if (!r)
+					if (r == -1)
 						continue;
-					dst = SYM(r);
+					dst = REG(r);
 				}
 				for (a=0; p->blk[a]!=b; a++)
 					assert(a+1 < p->narg);
 				src = p->arg[a];
-				if (rtype(src) == RSym)
+				if (rtype(src) == RTmp)
 					src = rref(&end[b->id], src.val);
 				pmadd(src, dst);
 			}
-			for (t=Tmp0; t<fn->ntmp; t++)
+			for (t=0; t<fn->ntmp; t++)
 				if (BGET(s->in, t)) {
 					src = rref(&end[b->id], t);
 					dst = rref(&beg[s->id], t);
diff --git a/lisc/spill.c b/lisc/spill.c
index 921a81c..64200af 100644
--- a/lisc/spill.c
+++ b/lisc/spill.c
@@ -13,7 +13,7 @@ loopmark(Blk *hd, Blk *b, Phi *p)
 	for (; p; p=p->link)
 		for (a=0; a<p->narg; a++)
 			if (p->blk[a] == b)
-			if (rtype(p->arg[a]) == RSym)
+			if (rtype(p->arg[a]) == RTmp)
 				BSET(hd->gen, p->arg[a].val);
 	if (b->visit == head)
 		return;
@@ -30,20 +30,16 @@ loopmark(Blk *hd, Blk *b, Phi *p)
 }
 
 static void
-symuse(Ref r, int use, int loop, Fn *fn)
+tmpuse(Ref r, int use, int loop, Fn *fn)
 {
-	Sym *s;
+	Tmp *t;
 
-	if (rtype(r) != RSym)
+	if (rtype(r) != RTmp)
 		return;
-	s = &fn->sym[r.val];
-	if (s->type != STmp)
-		return;
-	if (use)
-		s->nuse++;
-	else
-		s->ndef++;
-	s->cost += loop;
+	t = &fn->tmp[r.val];
+	t->nuse += use;
+	t->ndef += 1 - use;
+	t->cost += loop;
 }
 
 /* evaluate spill costs of temporaries,
@@ -57,7 +53,7 @@ fillcost(Fn *fn)
 	uint a;
 	Blk *b;
 	Ins *i;
-	Sym *s;
+	Tmp *t;
 	Phi *p;
 
 	for (b=fn->start; b; b=b->link) {
@@ -77,43 +73,41 @@ fillcost(Fn *fn)
 		if (hd && debug['S']) {
 			fprintf(stderr, "\t%-10s", b->name);
 			fprintf(stderr, " (% 3d) ", b->nlive);
-			dumpss(&b->gen, fn->sym, stderr);
+			dumpts(&b->gen, fn->tmp, stderr);
 		}
 	}
-	for (s=fn->sym; s-fn->sym < fn->ntmp; s++) {
-		s->cost = 0;
-		s->nuse = 0;
-		s->ndef = 0;
-		if (s->type == SReg)
-			s->cost = 100000;
+	for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
+		t->cost = 0;
+		t->nuse = 0;
+		t->ndef = 0;
 	}
 	for (b=fn->start; b; b=b->link) {
 		for (p=b->phi; p; p=p->link) {
-			/* zero cost for temporaries used
-			 * in phi instructions */
-			symuse(p->to, 0, 0, fn);
+			/* todo, the cost computation
+			 * for p->to is not great... */
+			tmpuse(p->to, 0, 0, fn);
 			for (a=0; a<p->narg; a++) {
 				n = p->blk[a]->loop;
 				assert(b->npred==p->narg &&
 					"wrong cfg");
 				n /= b->npred;
-				symuse(p->arg[a], 1, n, fn);
+				tmpuse(p->arg[a], 1, n, fn);
 			}
 		}
 		n = b->loop;
 		for (i=b->ins; i-b->ins < b->nins; i++) {
-			symuse(i->to, 0, n, fn);
-			symuse(i->arg[0], 1, n, fn);
-			symuse(i->arg[1], 1, n, fn);
+			tmpuse(i->to, 0, n, fn);
+			tmpuse(i->arg[0], 1, n, fn);
+			tmpuse(i->arg[1], 1, n, fn);
 		}
-		symuse(b->jmp.arg, 1, n, fn);
+		tmpuse(b->jmp.arg, 1, n, fn);
 	}
 	if (debug['S']) {
 		fprintf(stderr, "\n> Spill costs:\n");
-		for (n=Tmp0; n<fn->ntmp; n++)
+		for (n=0; n<fn->ntmp; n++)
 			fprintf(stderr, "\t%-10s %d\n",
-				fn->sym[n].name,
-				fn->sym[n].cost);
+				fn->tmp[n].name,
+				fn->tmp[n].cost);
 		fprintf(stderr, "\n");
 	}
 }
@@ -148,14 +142,16 @@ bcnt(Bits *b) /* glad I can pull it :) */
 
 extern Ins insb[NIns], *curi; /* shared work buffer */
 static Bits *f;  /* temps to prioritize in registers (for tcmp1) */
-static Sym *sym; /* current symbol table (for tcmpX) */
+static Tmp *tmp; /* current temporaries (for tcmpX) */
 static int ntmp; /* current # of temps (for limit) */
 static uint ns;  /* current # of spill slots */
+static int nreg; /* # of registers available */
+static Bits br;  /* live registers */
 
 static int
 tcmp0(const void *pa, const void *pb)
 {
-	return sym[*(int *)pb].cost - sym[*(int *)pa].cost;
+	return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost;
 }
 
 static int
@@ -172,11 +168,10 @@ slot(int t)
 {
 	int s;
 
-	assert(sym[t].type == STmp);
-	s = sym[t].spill;
+	s = tmp[t].spill;
 	if (!s) {
-		s = 1 + ns++;
-		sym[t].spill = s;
+		s = ++ns;
+		tmp[t].spill = s;
 	}
 	return SLOT(s);
 }
@@ -192,7 +187,7 @@ emit(short op, Ref to, Ref arg0, Ref arg1)
 static Bits
 limit(Bits *b, int k, Bits *fst)
 {
-	static int *tmp, maxt;
+	static int *tarr, maxt;
 	int i, t, nt;
 	Bits res;
 
@@ -200,28 +195,28 @@ limit(Bits *b, int k, Bits *fst)
 	if (nt <= k)
 		return *b;
 	if (nt > maxt) {
-		free(tmp);
-		tmp = alloc(nt * sizeof tmp[0]);
+		free(tarr);
+		tarr = alloc(nt * sizeof tarr[0]);
 		maxt = nt;
 	}
 	i = 0;
 	for (t=0; t<ntmp; t++)
 		if (BGET(*b, t))
-			tmp[i++] = t;
+			tarr[i++] = t;
 	assert(i == nt);
 	if (!fst)
-		qsort(tmp, nt, sizeof tmp[0], tcmp0);
+		qsort(tarr, nt, sizeof tarr[0], tcmp0);
 	else {
 		f = fst;
-		qsort(tmp, nt, sizeof tmp[0], tcmp1);
+		qsort(tarr, nt, sizeof tarr[0], tcmp1);
 	}
 	res = (Bits){{0}};
 	for (i=0; i<k && i<nt; i++)
-		BSET(res, tmp[i]);
+		BSET(res, tarr[i]);
 	for (; i<nt; i++) {
-		slot(tmp[i]);
+		slot(tarr[i]);
 		if (curi)
-			emit(OLoad, SYM(tmp[i]), slot(tmp[i]), R);
+			emit(OLoad, TMP(tarr[i]), slot(tarr[i]), R);
 	}
 	return res;
 }
@@ -240,13 +235,20 @@ setloc(Ref *pr, Bits *v, Bits *w)
 	 *   't' to 'w' before calling
 	 *   limit
 	 */
-	if (rtype(*pr) != RSym)
+	if (rtype(*pr) == RReg) {
+		nreg -= !BGET(br, pr->val);
+		BSET(br, pr->val);
+	}
+	if (rtype(*pr) != RTmp)
 		return;
 	t = pr->val;
 	BSET(*v, t);
-	*v = limit(v, NReg, w);
+	*v = limit(v, nreg, w);
 	if (curi->op == OLoad)
 	if (curi->to.val == t)
+		/* if t was spilled by limit,
+		 * it was not live so we don't
+		 * have to reload it */
 		curi++;
 	if (!BGET(*v, t))
 		*pr = slot(t);
@@ -279,13 +281,15 @@ spill(Fn *fn)
 	Phi *p;
 
 	ns = 0;
-	sym = fn->sym;
+	tmp = fn->tmp;
 	ntmp = fn->ntmp;
 	assert(ntmp < NBit*BITS);
 
 	for (n=fn->nblk-1; n>=0; n--) {
 		/* invariant: m>n => in,out of m updated */
 		b = fn->rpo[n];
+		nreg = NReg;
+		assert(bcnt(&br) == 0);
 
 		/* 1. find temporaries in registers at
 		 * the end of the block (put them in v) */
@@ -305,16 +309,16 @@ spill(Fn *fn)
 			for (z=0; z<BITS; z++)
 				v.t[z] = hd->gen.t[z] & b->out.t[z];
 			j = bcnt(&v);
-			if (j < NReg) {
+			if (j < nreg) {
 				w = b->out;
 				for (z=0; z<BITS; z++)
 					w.t[z] &= ~v.t[z];
 				j = bcnt(&w);   /* live through */
-				w = limit(&w, NReg - (l - j), 0);
+				w = limit(&w, nreg - (l - j), 0);
 				for (z=0; z<BITS; z++)
 					v.t[z] |= w.t[z];
-			} else if (j > NReg)
-				v = limit(&v, NReg, 0);
+			} else if (j > nreg)
+				v = limit(&v, nreg, 0);
 		} else if (s1) {
 			v = s1->in;
 			w = s1->in;
@@ -323,16 +327,16 @@ spill(Fn *fn)
 					v.t[z] |= s2->in.t[z];
 					w.t[z] &= s2->in.t[z];
 				}
-			assert(bcnt(&w) <= NReg);
-			v = limit(&v, NReg, &w);
+			assert(bcnt(&w) <= nreg);
+			v = limit(&v, nreg, &w);
 		}
 		b->out = v;
-		assert(bcnt(&v) <= NReg);
+		assert(bcnt(&v) <= nreg);
 
 		/* 2. process the block instructions */
-		if (rtype(b->jmp.arg) == RSym) {
+		if (rtype(b->jmp.arg) == RTmp) {
 			j = b->jmp.arg.val;
-			if (!BGET(v, j) && l==NReg) {
+			if (!BGET(v, j) && l==nreg) {
 				v = limit(&v, l-1, 0);
 				b->out = v;
 			}
@@ -340,20 +344,32 @@ spill(Fn *fn)
 		}
 		curi = &insb[NIns];
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
-			assert(bcnt(&v) <= NReg);
+			assert(bcnt(&v) <= nreg);
 			i--;
-			if (!req(i->to, R)) {
-				assert(rtype(i->to)==RSym);
+			s = 0;
+			switch (rtype(i->to)) {
+			default:
+				assert(!"unhandled destination");
+			case RTmp:
 				j = i->to.val;
 				if (BGET(v, j))
 					BCLR(v, j);
 				else
 					v = limit(&v, NReg-1, &w);
-				s = sym[j].spill;
-			} else
-				s = 0;
+				s = tmp[j].spill;
+				break;
+			case RReg:
+				j = i->to.val;
+				if (BGET(br, j)) {
+					BCLR(br, j);
+					nreg++;
+				} else
+					v = limit(&v, nreg-1, &w);
+				break;
+			case -1:;
+			}
 			w = (Bits){{0}};
-			if (rtype(i->arg[1]) == RSym
+			if (rtype(i->arg[1]) == RTmp
 			&& !req(i->to, R)
 			&& opdesc[i->op].comm == F) {
 				/* <arch>
@@ -376,20 +392,21 @@ spill(Fn *fn)
 		}
 
 		for (p=b->phi; p; p=p->link) {
+			assert(rtype(p->to) == RTmp);
 			t = p->to.val;
 			if (BGET(v, t)) {
 				BCLR(v, t);
-				s = sym[t].spill;
+				s = tmp[t].spill;
 				if (s)
 					emit(OStore, R, p->to, SLOT(s));
 			} else
 				p->to = slot(p->to.val);
 		}
+		b->in = v;
 		free(b->ins);
 		b->nins = &insb[NIns] - curi;
 		b->ins = alloc(b->nins * sizeof(Ins));
 		memcpy(b->ins, curi, b->nins * sizeof(Ins));
-		b->in = v;
 	}
 	fn->nspill = ns;
 }
diff --git a/lisc/ssa.c b/lisc/ssa.c
index 6ce9a82..0980de4 100644
--- a/lisc/ssa.c
+++ b/lisc/ssa.c
@@ -102,7 +102,7 @@ static Ref
 topdef(Blk *b, Fn *f)
 {
 	uint i;
-	int s1;
+	int t1;
 	Ref r;
 	Phi *p;
 
@@ -115,8 +115,8 @@ topdef(Blk *b, Fn *f)
 		return r;
 	}
 	/* add a phi node */
-	s1 = f->nsym++;
-	r = SYM(s1);
+	t1 = f->ntmp++;
+	r = TMP(t1);
 	top[b->id] = r;
 	p = alloc(sizeof *p);
 	p->link = b->phi;
@@ -137,7 +137,7 @@ void
 ssafix(Fn *f, int t)
 {
 	uint n;
-	int s0, s1;
+	int t0, t1;
 	Ref rt;
 	Blk *b;
 	Phi *p;
@@ -145,32 +145,32 @@ ssafix(Fn *f, int t)
 
 	top = alloc(f->nblk * sizeof top[0]);
 	bot = alloc(f->nblk * sizeof bot[0]);
-	rt = SYM(t);
-	s0 = f->nsym;
+	rt = TMP(t);
+	t0 = f->ntmp;
 	for (b=f->start; b; b=b->link) {
-		s1 = 0;
+		t1 = 0;
 		/* rename defs and some in-blocks uses */
 		for (p=b->phi; p; p=p->link)
 			if (req(p->to, rt)) {
-				s1 = f->nsym++;
-				p->to = SYM(s1);
+				t1 = f->ntmp++;
+				p->to = TMP(t1);
 			}
 		for (i=b->ins; i-b->ins < b->nins; i++) {
-			if (s1) {
+			if (t1) {
 				if (req(i->arg[0], rt))
-					i->arg[0] = SYM(s1);
+					i->arg[0] = TMP(t1);
 				if (req(i->arg[1], rt))
-					i->arg[1] = SYM(s1);
+					i->arg[1] = TMP(t1);
 			}
 			if (req(i->to, rt)) {
-				s1 = f->nsym++;
-				i->to = SYM(s1);
+				t1 = f->ntmp++;
+				i->to = TMP(t1);
 			}
 		}
-		if (s1 && req(b->jmp.arg, rt))
-			b->jmp.arg = SYM(s1);
+		if (t1 && req(b->jmp.arg, rt))
+			b->jmp.arg = TMP(t1);
 		top[b->id] = R;
-		bot[b->id] = s1 ? SYM(s1) : R;
+		bot[b->id] = t1 ? TMP(t1) : R;
 	}
 	for (b=f->start; b; b=b->link) {
 		for (p=b->phi; p; p=p->link)
@@ -186,14 +186,14 @@ ssafix(Fn *f, int t)
 		if (req(b->jmp.arg, rt))
 			b->jmp.arg = topdef(b, f);
 	}
-	/* add new symbols */
-	f->sym = realloc(f->sym, f->nsym * sizeof f->sym[0]);
-	if (!f->sym)
+	/* add new temporaries */
+	f->tmp = realloc(f->tmp, f->ntmp * sizeof f->tmp[0]);
+	if (!f->tmp)
 		diag("ssafix: out of memory");
-	for (s1=s0; s0<f->nsym; s0++) {
-		f->sym[s0] = f->sym[t];
-		snprintf(f->sym[s0].name, NString, "%s%d",
-			f->sym[t].name, s0-s1);
+	for (t1=t0; t0<f->ntmp; t0++) {
+		f->tmp[t0] = f->tmp[t];
+		snprintf(f->tmp[t0].name, NString, "%s%d",
+			f->tmp[t].name, t0-t1);
 	}
 	free(top);
 	free(bot);