summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--lisc/emit.c23
-rw-r--r--lisc/isel.c158
-rw-r--r--lisc/lisc.h6
-rw-r--r--lisc/parse.c9
-rw-r--r--lisc/spill.c1
5 files changed, 170 insertions, 27 deletions
diff --git a/lisc/emit.c b/lisc/emit.c
index 76ce82c..d5b1e80 100644
--- a/lisc/emit.c
+++ b/lisc/emit.c
@@ -76,13 +76,14 @@ eref(Ref r, Fn *fn, FILE *f)
 {
 	switch (rtype(r)) {
 	default:
-		diag("emitref: invalid reference");
+		diag("emit: invalid reference");
 	case RTmp:
 		assert(r.val < Tmp0);
 		fprintf(f, "%%%s", rtoa(r.val));
 		break;
+	case RMem:
 	case RSlot:
-		fprintf(f, "-%d(%%rbp)", 8 * r.val);
+		fprintf(f, "-%d(%%rbp)", 4 * r.val);
 		break;
 	case RCon:
 		fprintf(f, "$");
@@ -94,9 +95,18 @@ eref(Ref r, Fn *fn, FILE *f)
 static void
 emem(Ref r, Fn *fn, FILE *f)
 {
+	/* this function is now a hack
+	 * when full memory references
+	 * are supported, constants and
+	 * temporaries will not have
+	 * multiple meanings as they do
+	 * now
+	 */
+
 	switch (rtype(r)) {
 	default:
-		diag("emem: invalid memory reference");
+		diag("emit: invalid memory reference");
+	case RMem:
 	case RSlot:
 		eref(r, fn, f);
 		break;
@@ -171,7 +181,10 @@ eins(Ins i, Fn *fn, FILE *f)
 				break;
 			}
 		}
-		if (!req(i.arg[0], i.to))
+		if (rtype(i.arg[0]) == RMem) {
+			assert(rtype(i.to)==RTmp && i.to.val<EAX);
+			eop("lea", i.arg[0], i.to, fn, f);
+		} else if (!req(i.arg[0], i.to))
 			eop("mov", i.arg[0], i.to, fn, f);
 		break;
 	case OStorel:
@@ -258,8 +271,6 @@ emitfn(Fn *fn, FILE *f)
 		"liscf:\n"
 		"\tpush %%rbp\n"
 		"\tmov %%rsp, %%rbp\n"
-		"\tsub $%u, %%rsp\n",
-		fn->nspill * 8
 	);
 	for (b=fn->start; b; b=b->link) {
 		fprintf(f, ".L%s:\n", b->name);
diff --git a/lisc/isel.c b/lisc/isel.c
index a9e3ee2..7081eea 100644
--- a/lisc/isel.c
+++ b/lisc/isel.c
@@ -1,15 +1,15 @@
 #include "lisc.h"
 #include <limits.h>
 
-/* For x86_64, we have to:
+/* For x86_64, do the following:
  *
  * - check that constants are used only in
  *   places allowed
- *
  * - ensure immediates always fit in 32b
- *
  * - explicit machine register contraints
  *   on instructions like division.
+ * - implement fast locals (the streak of
+ *   constant allocX in the first basic block)
  *
  * - lower calls (future, I have to think
  *   about their representation and the
@@ -111,8 +111,24 @@ static void
 sel(Ins i, Fn *fn)
 {
 	Ref r0, r1, ra, rd;
-	int n, ty, c;
+	int n, ty, c, s;
 	int64_t val;
+	struct {
+		Ref r;
+		int s;
+	} cpy[2];
+
+	for (n=0; n<2; n++) {
+		r0 = i.arg[n];
+		cpy[n].s = 0;
+		if (rtype(r0) == RTmp)
+		if ((s = fn->tmp[r0.val].spill)) {
+			r0 = newtmp(TLong, fn);
+			i.arg[n] = r0;
+			cpy[n].r = r0;
+			cpy[n].s = s;
+		}
+	}
 
 	switch (i.op) {
 	case ODiv:
@@ -158,16 +174,24 @@ sel(Ins i, Fn *fn)
 			n = 0;
 		goto Emit;
 	case OStorel:
-		n = 1;
-		goto Emit;
 	case OStorew:
 	case OStoreb:
 	case OStores:
+		if (cpy[1].s) {
+			i.arg[1] = MEM(cpy[1].s);
+			cpy[1].s = 0;
+		}
+		n = i.op == OStorel;
+		goto Emit;
 	case OLoad:
 	case OLoadss:
 	case OLoadus:
 	case OLoadsb:
 	case OLoadub:
+		if (cpy[0].s) {
+			i.arg[0] = MEM(cpy[0].s);
+			cpy[0].s = 0;
+		}
 		n = 0;
 Emit:
 		emit(i.op, i.to, i.arg[0], i.arg[1]);
@@ -184,27 +208,31 @@ Emit:
 		}
 		break;
 	case OAlloc:
-		/* we need to make sure
+	case OAlloc+1:
+	case OAlloc+2: /* == OAlloc1 */
+		/* if this is not a fast alloc
+		 * we need to make sure
 		 * the stack remains aligned
 		 * (rsp + 8 = 0) mod 16
-		 * as a consequence, the alloc
-		 * alignment is 8, we can change
-		 * that in the future
 		 */
+		if (req(i.to, R))
+			break;
 		if (rtype(i.arg[0]) == RCon) {
 			val = fn->con[i.arg[0].val].val;
 			val = (val + 15)  & ~INT64_C(15);
 			if (val < 0 || val > INT32_MAX)
 				diag("isel: alloc too large");
 			emit(OAlloc, i.to, newcon(val, fn), R);
-		} else {
+		} else if (i.op < OAlloc1) {
 			/* r0 = (i.arg[0] + 15) & -16 */
 			r0 = newtmp(TLong, fn);
 			r1 = newtmp(TLong, fn);
 			emit(OAlloc, i.to, r0, R);
 			emit(OAnd, r0, r1, newcon(-16, fn));
 			emit(OAdd, r1, i.arg[0], newcon(15, fn));
-		}
+		} else
+			/* todo, support it! */
+			diag("isel: unsupported alloc alignment");
 		break;
 	default:
 		if (OCmp <= i.op && i.op <= OCmp1) {
@@ -217,6 +245,10 @@ Emit:
 		}
 		diag("isel: non-exhaustive implementation");
 	}
+
+	for (n=0; n<2; n++)
+		if (cpy[n].s)
+			emit(OCopy, cpy[n].r, MEM(cpy[n].s), R);
 }
 
 static Ins *
@@ -285,6 +317,63 @@ seljmp(Blk *b, Fn *fn)
 	}
 }
 
+int
+slot_(int sz, int al /* log2 */, Fn *fn)
+{
+	enum { N = sizeof fn->slot / sizeof fn->slot[0] };
+	int j, k, s, l, a, ret;
+	int *sa;
+
+	sa = fn->slot;
+	a = 1 << al;
+	l = sz;
+
+	if (l > a) {
+		/* for large slots, we just
+		 * tack them on the next max
+		 * alignment slot available
+		 * todo, could sophisticate
+		 */
+		l = (l + a-1) & ~(a-1);
+		s = sa[N-1] + l;
+		ret = s;
+		for (j=0, k=1; j<N; j++, k*=2) {
+			l = (l + k-1) & ~(k-1);
+			sa[j] = sa[N-1] + l;
+		}
+	} else {
+		j = al;
+		s = sa[j] + a;
+		ret = s;
+	Shift:
+		if (j < N-1 && s < sa[j+1])
+			/* ........-----------...
+			 * ^       ^          ^
+			 * sa[j]  sa[j]+a    sa[j+1]
+			 *
+			 * we have to skip to the
+			 * next large whole
+			 */
+			s = sa[j+1];
+
+		for (k=0; k<=j; k++)
+			/* move all smaller holes
+			 * that we contain with us
+			 */
+			if (sa[k] == sa[j])
+				sa[k] = s;
+
+		if (j < N-1 && s > sa[j+1]) {
+			/* we were in a bigger hole,
+			 * it needs to shift further
+			 */
+			s = sa[++j] + (a *= 2);
+			goto Shift;
+		}
+	}
+	return ret;
+}
+
 /* instruction selection
  * requires use counts (as given by parsing)
  */
@@ -293,18 +382,53 @@ isel(Fn *fn)
 {
 	Blk *b;
 	Ins *i;
-	uint nins;
+	Phi *p;
+	Ref *a;
+	int n, al, s;
+	int64_t sz;
+
+	/* 1. assign slots to fast allocs */
+	for (n=Tmp0; n<fn->ntmp; n++)
+		fn->tmp[n].spill = 0;
+	fn->slot[0] = 0;
+	fn->slot[1] = 0;
+	fn->slot[2] = 2;
+	for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
+		if (OAlloc <= i->op && i->op <= OAlloc1) {
+			if (rtype(i->arg[0]) != RCon)
+				break;
+			sz = fn->con[i->arg[0].val].val;
+			if (sz < 0 || sz >= INT_MAX-3)
+				diag("isel: invalid alloc size");
+			sz = (sz + 3) / 4;
+			al = i->op - OAlloc;
+			s = slot_(sz, al, fn);
+			assert(s > 0);
+			fn->tmp[i->to.val].spill = s;
+			i->to = R;
+		}
+
 
+	/* 2. select machine instructions */
 	for (b=fn->start; b; b=b->link) {
+
+		/* 2.1. process block instructions */
 		curi = &insb[NIns];
 		seljmp(b, fn);
 		for (i=&b->ins[b->nins]; i>b->ins;) {
 			sel(*--i, fn);
 		}
-		nins = &insb[NIns] - curi;
+		n = &insb[NIns] - curi;
 		free(b->ins);
-		b->ins = alloc(nins * sizeof b->ins[0]);
-		memcpy(b->ins, curi, nins * sizeof b->ins[0]);
-		b->nins = nins;
+		b->ins = alloc(n * sizeof b->ins[0]);
+		memcpy(b->ins, curi, n * sizeof b->ins[0]);
+		b->nins = n;
+
+		/* 2.2. fix fast local references in phis */
+		for (p=b->phi; p; p=p->link)
+			for (a=p->arg; a-p->arg < p->narg; a++)
+				if (rtype(*a) == RTmp)
+				if ((s = fn->tmp[a->val].spill))
+					*a = MEM(s);
 	}
 }
diff --git a/lisc/lisc.h b/lisc/lisc.h
index 3ae5c56..50c6504 100644
--- a/lisc/lisc.h
+++ b/lisc/lisc.h
@@ -90,6 +90,7 @@ struct Ref {
 enum {
 	RTmp,
 	RCon,
+	RMem,
 	RSlot,
 	NRef = (1<<14) - 1
 };
@@ -98,6 +99,7 @@ enum {
 #define TMP(x)   (Ref){RTmp, x}
 #define CON(x)   (Ref){RCon, x}
 #define CON_Z    CON(0)          /* reserved zero constant */
+#define MEM(x)   (Ref){RMem, x}
 #define SLOT(x)  (Ref){RSlot, x}
 
 static inline int req(Ref a, Ref b)
@@ -139,6 +141,7 @@ enum Op {
 	OLoadub,
 	OCopy,
 	OAlloc,
+	OAlloc1 = OAlloc + 2,
 	NPubOp,
 
 	/* reserved instructions */
@@ -237,7 +240,7 @@ struct Fn {
 	int ncon;
 	int nblk;
 	Blk **rpo;
-	uint nspill;
+	int slot[3];
 };
 
 
@@ -262,6 +265,7 @@ void ssafix(Fn *, int);
 void filllive(Fn *);
 
 /* isel.c */
+int slot_(int, int, Fn *);
 void isel(Fn *);
 
 /* spill.c */
diff --git a/lisc/parse.c b/lisc/parse.c
index 5a06911..f51d5a5 100644
--- a/lisc/parse.c
+++ b/lisc/parse.c
@@ -26,7 +26,6 @@ OpDesc opdesc[NOp] = {
 	[OLoadus] = { "loadus", 1, 0 },
 	[OLoadsb] = { "loadsb", 1, 0 },
 	[OLoadub] = { "loadub", 1, 0 },
-	[OAlloc]  = { "alloc",  1, 1 },
 	[OCopy]   = { "copy",   1, 1 },
 	[ONop]    = { "nop",    0, 0 },
 	[OSwap]   = { "swap",   2, 2 },
@@ -34,6 +33,9 @@ OpDesc opdesc[NOp] = {
 	[OXDiv]   = { "xdiv",   1, 1 },
 	[OXCmpw]  = { "xcmpw",  2, 1 },
 	[OXCmpl]  = { "xcmpl",  2, 1 },
+	[OAlloc]   = { "alloc4",  1, 1 },
+	[OAlloc+1] = { "alloc8",  1, 1 },
+	[OAlloc+2] = { "alloc16", 1, 1 },
 
 	#define X(c)                        \
 	[OCmp+C##c]  = { "c"    #c, 2, 0 }, \
@@ -559,7 +561,10 @@ printref(Ref r, Fn *fn, FILE *f)
 		}
 		break;
 	case RSlot:
-		fprintf(f, "$%d", r.val);
+		fprintf(f, "S%d", r.val);
+		break;
+	case RMem:
+		fprintf(f, "M%d", r.val);
 		break;
 	}
 	return "";
diff --git a/lisc/spill.c b/lisc/spill.c
index ad461f0..a0f6f38 100644
--- a/lisc/spill.c
+++ b/lisc/spill.c
@@ -375,5 +375,4 @@ spill(Fn *fn)
 		b->ins = alloc(b->nins * sizeof(Ins));
 		memcpy(b->ins, curi, b->nins * sizeof(Ins));
 	}
-	fn->nspill = ns;
 }