summary refs log tree commit diff
path: root/lisc
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2015-09-14 23:36:26 -0400
committerQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2015-09-15 23:01:33 -0400
commitf7bfa2e435c78917bd6df0c80e7e488751dac58c (patch)
treeff55a42ecc560ccde7af547b59c71b94afe6844b /lisc
parentbad74e6dce897df9f21cea5bf0a32df298856351 (diff)
downloadroux-f7bfa2e435c78917bd6df0c80e7e488751dac58c.tar.gz
heavy modification of call handling
The IR generated by calls was very bulky because two
instructions were used for marking the live range of
a clobber.

This patch attempts to store the information of what
registers are use/def/clobber in the call instruction
itself, this leads to more compact code (even more
when we'll have SSE registers).  However, I find that
the amount of extra code needed is not really
easonable.  Fortunately it is not too invasive, thus
if the complexity creeps in, it should be easy to
revert.
Diffstat (limited to 'lisc')
-rw-r--r--lisc/isel.c60
-rw-r--r--lisc/lisc.h13
-rw-r--r--lisc/live.c12
-rw-r--r--lisc/main.c1
-rw-r--r--lisc/parse.c7
-rw-r--r--lisc/rega.c55
-rw-r--r--lisc/spill.c16
7 files changed, 125 insertions, 39 deletions
diff --git a/lisc/isel.c b/lisc/isel.c
index 6928a1a..1712333 100644
--- a/lisc/isel.c
+++ b/lisc/isel.c
@@ -442,10 +442,51 @@ classify(AInfo *a, Typ *t)
 	}
 }
 
+int rsave[NRSave] = {RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX};
+
+uint64_t calldef(Ins i, int *p)
+{
+	uint64_t b;
+	int n;
+
+	n = i.arg[1].val & 15;
+	b = 1ll << RAX;
+	if (n == 2)
+		b |= 1ll << RDX;
+	if (p)
+		*p = n;
+	return b;
+}
+
+uint64_t calluse(Ins i, int *p)
+{
+	uint64_t b;
+	int j, n;
+
+	n = (i.arg[1].val >> 4) & 15;
+	for (j = 0, b = 0; j < n; j++)
+		b |= 1ll << rsave[j];
+	if (p)
+		*p = n;
+	return b;
+}
+
+uint64_t callclb(Ins i, int *p)
+{
+	uint64_t b;
+	int j, n;
+
+	n = (i.arg[1].val >> 4) & 15;
+	for (j = n, b = 0; j < NRSave; j++)
+		b |= 1ll << rsave[j];
+	if (p)
+		*p = n;
+	return b;
+}
+
 static void
 selcall(Fn *fn, Ins *i0, Ins *i1)
 {
-	static int ireg[8] = {RDI, RSI, RDX, RCX, R8, R9, R10, R11};
 	Ins *i;
 	AInfo *ai, *a;
 	int nint, nsse, ni, ns, n;
@@ -497,20 +538,15 @@ selcall(Fn *fn, Ins *i0, Ins *i1)
 		}
 	stk += stk & 15;
 
-	if (rtype(i1->arg[1]) == RTyp)
+	if (!req(i1->arg[1], R))
 		diag("struct-returning function not implemented");
 
 	if (stk) {
 		r = newcon(-(int64_t)stk, fn);
 		emit(OSAlloc, 0, R, r, R);
 	}
-	for (n=0; n<8; n++)
-		emit(OCopy, 0, R, TMP(ireg[n]), R);
 	emit(OCopy, i1->wide, i1->to, TMP(RAX), R);
-	emit(OCall, 0, R, i->arg[0], R);
-	emit(OCopy, 1, TMP(RAX), R, R);
-	for (n=6-nint; n<8; n++)
-		emit(OCopy, 1, TMP(ireg[n]), R, R);
+	emit(OCall, 0, R, i->arg[0], CALL(1 + ((6-nint) << 4)));
 
 	for (i=i0, a=ai, ni=0; i<i1; i++, a++) {
 		if (a->inmem)
@@ -519,18 +555,18 @@ selcall(Fn *fn, Ins *i0, Ins *i1)
 			if (a->rty[0] == RSse || a->rty[1] == RSse)
 				diag("isel: unsupported float struct");
 			if (a->size > 8) {
-				r = TMP(ireg[ni+1]);
+				r = TMP(rsave[ni+1]);
 				r1 = newtmp(fn);
 				emit(OLoad, 1, r, r1, R);
 				r = newcon(8, fn);
 				emit(OAdd, 1, r1, i->arg[1], r);
-				r = TMP(ireg[ni]);
+				r = TMP(rsave[ni]);
 				ni += 2;
 			} else
-				r = TMP(ireg[ni++]);
+				r = TMP(rsave[ni++]);
 			emit(OLoad, 1, r, i->arg[1], R);
 		} else {
-			r = TMP(ireg[ni++]);
+			r = TMP(rsave[ni++]);
 			emit(OCopy, i->wide, r, i->arg[0], R);
 		}
 	}
diff --git a/lisc/lisc.h b/lisc/lisc.h
index c85c521..ff80900 100644
--- a/lisc/lisc.h
+++ b/lisc/lisc.h
@@ -43,7 +43,8 @@ enum Reg {
 
 	Tmp0, /* first non-reg temporary */
 
-	NReg = R12 - RAX + 1
+	NReg = R12 - RAX + 1,
+	NRSave = 9,
 };
 
 enum {
@@ -76,7 +77,8 @@ enum {
 	RTmp,
 	RCon,
 	RSlot,
-	RTyp,
+	RAlt,
+	RCallm = 0x1000,
 	NRef = (1<<14) - 1
 };
 
@@ -85,7 +87,8 @@ enum {
 #define CON(x)   (Ref){RCon, x}
 #define CON_Z    CON(0)          /* reserved zero constant */
 #define SLOT(x)  (Ref){RSlot, x}
-#define TYP(x)   (Ref){RTyp, x}
+#define TYP(x)   (Ref){RAlt, x}
+#define CALL(x)  (Ref){RAlt, x|RCallm}
 
 static inline int req(Ref a, Ref b)
 { return a.type == b.type && a.val == b.val; }
@@ -274,6 +277,10 @@ void ssafix(Fn *, int);
 void filllive(Fn *);
 
 /* isel.c */
+extern int rsave[NRSave];
+uint64_t calldef(Ins, int *);
+uint64_t calluse(Ins, int *);
+uint64_t callclb(Ins, int *);
 int slota(int, int, int *);
 void isel(Fn *);
 
diff --git a/lisc/live.c b/lisc/live.c
index 64fcf6a..c3aae86 100644
--- a/lisc/live.c
+++ b/lisc/live.c
@@ -21,7 +21,7 @@ filllive(Fn *f)
 	Blk *b;
 	Ins *i;
 	Phi *p;
-	int z, n, chg, nlv;
+	int z, m, n, chg, nlv;
 	uint a;
 	Bits tb;
 
@@ -50,7 +50,15 @@ Again:
 		bset(b->jmp.arg, b, &nlv);
 		b->nlive = nlv;
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
-			i--;
+			if ((--i)->op == OCall) {
+				b->in.t[0] &= ~calldef(*i, &m);
+				nlv -= m;
+				if (nlv + NRSave > b->nlive)
+					b->nlive = nlv + NRSave;
+				b->in.t[0] |= calluse(*i, &m);
+				nlv += m;
+				bset(i->arg[0], b, &nlv);
+			}
 			if (!req(i->to, R)) {
 				assert(rtype(i->to) == RTmp);
 				nlv -= BGET(b->in, i->to.val);
diff --git a/lisc/main.c b/lisc/main.c
index cb2e82e..1452ff6 100644
--- a/lisc/main.c
+++ b/lisc/main.c
@@ -63,6 +63,7 @@ main(int ac, char *av[])
 		Blk *b;
 
 		fprintf(stderr, "[Testing Liveness]\n");
+		isel(fn);
 		fillrpo(fn);
 		filllive(fn);
 		for (b=fn->start; b; b=b->link) {
diff --git a/lisc/parse.c b/lisc/parse.c
index 957fae1..29f5653 100644
--- a/lisc/parse.c
+++ b/lisc/parse.c
@@ -779,8 +779,11 @@ printref(Ref r, Fn *fn, FILE *f)
 	case RSlot:
 		fprintf(f, "S%d", r.val);
 		break;
-	case RTyp:
-		fprintf(f, ":%s", typ[r.val].name);
+	case RAlt:
+		if (r.val & RCallm)
+			fprintf(f, "%x", r.val & (RCallm-1));
+		else
+			fprintf(f, ":%s", typ[r.val].name);
 		break;
 	}
 }
diff --git a/lisc/rega.c b/lisc/rega.c
index b4e4572..1263c6a 100644
--- a/lisc/rega.c
+++ b/lisc/rega.c
@@ -192,36 +192,50 @@ pmgen()
 	free(status);
 }
 
+static void
+move(int r, Ref to, RMap *m)
+{
+	int n, t, r1;
+
+	r1 = req(to, R) ? -1 : rfree(m, to.val);
+	if (BGET(m->b, r) && r1 != r) {
+		/* r is used and not by to */
+		for (n=0; m->r[n] != r; n++)
+			assert(n+1 < m->n);
+		t = m->t[n];
+		rfree(m, t);
+		BSET(m->b, r);
+		ralloc(m, t);
+		BCLR(m->b, r);
+	}
+	t = r1 == -1 ? r : to.val;
+	radd(m, t, r);
+}
+
 static Ins *
 dopm(Blk *b, Ins *i, RMap *m)
 {
 	RMap m0;
 	int n, r, r1, t, nins;
 	Ins *i1, *ib, *ip, *ir;
+	uint64_t def;
 
 	m0 = *m;
 	i1 = i+1;
 	for (;; i--) {
-		r = i->arg[0].val;
-		r1 = req(i->to, R) ? -1 : rfree(m, i->to.val);
-		if (BGET(m->b, r) && r1 != r) {
-			/* r is used and not by i->to, */
-			for (n=0; m->r[n] != r; n++)
-				assert(n+1 < m->n);
-			t = m->t[n];
-			rfree(m, t);
-			BSET(m->b, r);
-			ralloc(m, t);
-			BCLR(m->b, r);
-		}
-		t = r1 == -1 ? r : i->to.val;
-		radd(m, t, r);
+		move(i->arg[0].val, i->to, m);
 		if (i == b->ins
 		|| (i-1)->op != OCopy
 		|| !isreg((i-1)->arg[0]))
 			break;
 	}
 	assert(m0.n <= m->n);
+	if (i > b->ins && (i-1)->op == OCall) {
+		def = calldef(*i, 0);
+		for (r=0; r<NRSave; r++)
+			if (!((1ll << rsave[r]) & def))
+				move(rsave[r], R, m);
+	}
 	for (npm=0, n=0; n<m->n; n++)
 		if ((t = m->t[n]) >= Tmp0) {
 			r1 = m->r[n];
@@ -266,6 +280,7 @@ rega(Fn *fn)
 	Phi *p;
 	uint a;
 	Ref src, dst;
+	uint64_t rs;
 
 	tmp = fn->tmp;
 	end = alloc(fn->nblk * sizeof end[0]);
@@ -303,8 +318,16 @@ rega(Fn *fn)
 		if (rtype(b->jmp.arg) == RTmp)
 			b->jmp.arg = ralloc(&cur, b->jmp.arg.val);
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
-			i--;
-			if (i->op == OCopy && isreg(i->arg[0])) {
+			switch ((--i)->op) {
+			case OCall:
+				rs = calldef(*i, 0) | callclb(*i, 0);
+				for (r=0; r<NReg; r++)
+					if ((1ll << r) & rs)
+						rfree(&cur, r);
+				continue;
+			case OCopy:
+				if (!isreg(i->arg[0]))
+					break;
 				i = dopm(b, i, &cur);
 				continue;
 			}
diff --git a/lisc/spill.c b/lisc/spill.c
index 2625ceb..7b2476e 100644
--- a/lisc/spill.c
+++ b/lisc/spill.c
@@ -277,6 +277,7 @@ inregs(Blk *b, Blk *s) /* todo, move to live.c */
 static Ins *
 dopm(Blk *b, Ins *i, Bits *v)
 {
+	int j;
 	Ins *i1;
 
 	/* consecutive moves from
@@ -293,7 +294,14 @@ dopm(Blk *b, Ins *i, Bits *v)
 		|| !isreg((i-1)->arg[0]))
 			break;
 	}
-	limit(v, NReg, 0);
+	if (i > b->ins && (i-1)->op == OCall) {
+		calldef(*--i, &j);
+		limit(v, NReg - NRSave + j, 0);
+		v->t[0] &= ~calldef(*i, 0);
+		v->t[0] |= calluse(*i, 0);
+		setloc(&i->arg[0], v, &(Bits){{0}});
+	} else
+		limit(v, NReg, 0);
 	do
 		emit(*--i1);
 	while (i1 != i);
@@ -388,21 +396,21 @@ spill(Fn *fn)
 		for (i=&b->ins[b->nins]; i!=b->ins;) {
 			assert(bcnt(&v) <= NReg);
 			i--;
-			s = 0;
 			if (i->op == OCopy && isreg(i->arg[0])) {
 				i = dopm(b, i, &v);
 				continue;
 			}
+			s = 0;
+			w = (Bits){{0}};
 			if (!req(i->to, R)) {
 				assert(rtype(i->to) == RTmp);
 				t = i->to.val;
 				if (BGET(v, t))
 					BCLR(v, t);
 				else
-					limit(&v, NReg-1, &w);
+					limit(&v, NReg-1, 0);
 				s = tmp[t].spill;
 			}
-			w = (Bits){{0}};
 			j = opdesc[i->op].nmem;
 			if (!j && rtype(i->arg[0]) == RTmp)
 				BSET(w, i->arg[0].val);