summary refs log tree commit diff
path: root/src/rega.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/rega.c')
-rw-r--r--src/rega.c598
1 files changed, 0 insertions, 598 deletions
diff --git a/src/rega.c b/src/rega.c
deleted file mode 100644
index 7f8edcf..0000000
--- a/src/rega.c
+++ /dev/null
@@ -1,598 +0,0 @@
-#include "all.h"
-
-#ifdef TEST_PMOV
-	#undef assert
-	#define assert(x) assert_test(#x, x)
-#endif
-
-typedef struct RMap RMap;
-
-struct RMap {
-	int t[NIReg+NFReg];
-	int r[NIReg+NFReg];
-	BSet b[1];
-	int n;
-};
-
-static bits regu;      /* registers used */
-static Tmp *tmp;       /* function temporaries */
-static Mem *mem;       /* function mem references */
-static struct {
-	Ref src, dst;
-	int cls;
-} *pm;                 /* parallel move constructed */
-static int cpm, npm;   /* capacity and size of pm */
-
-static int *
-hint(int t)
-{
-	return &tmp[phicls(t, tmp)].hint.r;
-}
-
-static void
-sethint(int t, int r)
-{
-	bits m;
-
-	m = tmp[phicls(t, tmp)].hint.m;
-	if (*hint(t) == -1)
-	if (!(BIT(r) & m))
-		*hint(t) = r;
-}
-
-static void
-rcopy(RMap *ma, RMap *mb)
-{
-	memcpy(ma->t, mb->t, sizeof ma->t);
-	memcpy(ma->r, mb->r, sizeof ma->r);
-	bscopy(ma->b, mb->b);
-	ma->n = mb->n;
-}
-
-static int
-rfind(RMap *m, int t)
-{
-	int i;
-
-	for (i=0; i<m->n; i++)
-		if (m->t[i] == t)
-			return m->r[i];
-	return -1;
-}
-
-static Ref
-rref(RMap *m, int t)
-{
-	int r, s;
-
-	r = rfind(m, t);
-	if (r == -1) {
-		s = tmp[t].slot;
-		assert(s != -1 && "should have spilled");
-		return SLOT(s);
-	} else
-		return TMP(r);
-}
-
-static void
-radd(RMap *m, int t, int r)
-{
-	assert((t >= Tmp0 || t == r) && "invalid temporary");
-	assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
-	assert(!bshas(m->b, t) && "temporary has mapping");
-	assert(!bshas(m->b, r) && "register already allocated");
-	assert(m->n <= NIReg+NFReg && "too many mappings");
-	bsset(m->b, t);
-	bsset(m->b, r);
-	m->t[m->n] = t;
-	m->r[m->n] = r;
-	m->n++;
-	regu |= BIT(r);
-}
-
-static Ref
-ralloc(RMap *m, int t)
-{
-	bits regs;
-	int r, r0, r1;
-
-	if (t < Tmp0) {
-		assert(bshas(m->b, t));
-		return TMP(t);
-	}
-	if (bshas(m->b, t)) {
-		r = rfind(m, t);
-		assert(r != -1);
-		return TMP(r);
-	}
-	r = *hint(t);
-	if (r == -1 || bshas(m->b, r)) {
-		regs = tmp[phicls(t, tmp)].hint.m;
-		regs |= m->b->t[0];
-		switch (KBASE(tmp[t].cls)) {
-		case 0:
-			r0 = RAX;
-			r1 = RAX + NIReg;
-			break;
-		case 1:
-			r0 = XMM0;
-			r1 = XMM0 + NFReg;
-			break;
-		}
-		for (r=r0; r<r1; r++)
-			if (!(regs & BIT(r)))
-				goto Found;
-		for (r=r0; r<r1; r++)
-			if (!bshas(m->b, r))
-				goto Found;
-		diag("rega: no more regs");
-	}
-Found:
-	radd(m, t, r);
-	sethint(t, r);
-	return TMP(r);
-}
-
-static int
-rfree(RMap *m, int t)
-{
-	int i, r;
-
-	if (!bshas(m->b, t))
-		return -1;
-	for (i=0; m->t[i] != t; i++)
-		assert(i+1 < m->n);
-	r = m->r[i];
-	bsclr(m->b, t);
-	bsclr(m->b, r);
-	m->n--;
-	memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
-	memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
-	return r;
-}
-
-static void
-mdump(RMap *m)
-{
-	int i;
-
-	for (i=0; i<m->n; i++)
-		fprintf(stderr, " (%s, R%d)",
-			tmp[m->t[i]].name,
-			m->r[i]);
-	fprintf(stderr, "\n");
-}
-
-static void
-pmadd(Ref src, Ref dst, int k)
-{
-	if (npm == cpm) {
-		cpm = cpm * 2 + 16;
-		pm = realloc(pm, cpm * sizeof pm[0]);
-		if (!pm)
-			diag("pmadd: out of memory");
-	}
-	pm[npm].src = src;
-	pm[npm].dst = dst;
-	pm[npm].cls = k;
-	npm++;
-}
-
-enum PMStat { ToMove, Moving, Moved };
-
-static Ref
-pmrec(enum PMStat *status, int i, int *k)
-{
-	Ref swp, swp1;
-	int j, k1;
-
-	/* note, this routine might emit
-	 * too many large instructions:
-	 *
-	 *                  , x -- x
-	 *      x -- x -- x        |
-	 *                  ` x -- x
-	 *
-	 * if only the first move is wide
-	 * the whole cycle will be wide,
-	 * this is safe but not necessary
-	 */
-
-	if (req(pm[i].src, pm[i].dst))
-		return R;
-	status[i] = Moving;
-	assert(KBASE(*k) == KBASE(pm[i].cls));
-	assert((Kw|1) == Kl && (Ks|1) == Kd);
-	*k |= KWIDE(pm[i].cls); /* see above */
-	swp = R;
-	for (j=0; j<npm; j++) {
-		if (req(pm[j].src, pm[i].dst))
-			switch (status[j]) {
-			case ToMove:
-				k1 = *k;
-				swp1 = pmrec(status, j, &k1);
-				if (!req(swp1, R)) {
-					assert(req(swp, R));
-					swp = swp1;
-					*k = k1;
-				}
-				break;
-			case Moving:
-				assert(req(swp, R));
-				swp = pm[i].dst;
-				break;
-			case Moved:
-				break;
-			}
-	}
-	status[i] = Moved;
-	if (req(swp, R)) {
-		*curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls};
-		return R;
-	} else if (!req(swp, pm[i].src)) {
-		*curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k};
-		return swp;
-	} else
-		return R;
-
-}
-
-static void
-pmgen()
-{
-	int i, k;
-	enum PMStat *status;
-
-	status = alloc(npm * sizeof status[0]);
-	assert(!npm || status[npm-1] == ToMove);
-	curi = insb;
-	for (i=0; i<npm; i++)
-		if (status[i] == ToMove) {
-			k = pm[i].cls;
-			pmrec(status, i, &k);
-		}
-}
-
-static void
-move(int r, Ref to, RMap *m)
-{
-	int n, t, r1;
-
-	r1 = req(to, R) ? -1 : rfree(m, to.val);
-	if (bshas(m->b, r) && r1 != r) {
-		/* r is used and not by to */
-		for (n=0; m->r[n] != r; n++)
-			assert(n+1 < m->n);
-		t = m->t[n];
-		rfree(m, t);
-		bsset(m->b, r);
-		ralloc(m, t);
-		bsclr(m->b, r);
-	}
-	t = req(to, R) ? r : to.val;
-	radd(m, t, r);
-}
-
-static int
-regcpy(Ins *i)
-{
-	return i->op == OCopy && isreg(i->arg[0]);
-}
-
-static Ins *
-dopm(Blk *b, Ins *i, RMap *m)
-{
-	RMap m0;
-	int n, r, r1, t, s;
-	Ins *i0, *i1, *ip, *ir;
-	bits def;
-
-	m0 = *m;
-	i1 = ++i;
-	do {
-		i--;
-		move(i->arg[0].val, i->to, m);
-	} while (i != b->ins && regcpy(i-1));
-	assert(m0.n <= m->n);
-	if (i != b->ins && (i-1)->op == OCall) {
-		def = retregs((i-1)->arg[1], 0);
-		for (r=0; r<NRSave; r++)
-			if (!(BIT(rsave[r]) & def))
-				move(rsave[r], R, m);
-	}
-	for (npm=0, n=0; n<m->n; n++) {
-		t = m->t[n];
-		s = tmp[t].slot;
-		r1 = m->r[n];
-		r = rfind(&m0, t);
-		if (r != -1)
-			pmadd(TMP(r1), TMP(r), tmp[t].cls);
-		else if (s != -1)
-			pmadd(TMP(r1), SLOT(s), tmp[t].cls);
-	}
-	for (ip=i; ip<i1; ip++) {
-		if (!req(ip->to, R))
-			rfree(m, ip->to.val);
-		r = ip->arg[0].val;
-		if (rfind(m, r) == -1)
-			radd(m, r, r);
-	}
-	pmgen();
-#ifdef TEST_PMOV
-	return 0;
-#endif
-	n = b->nins - (i1 - i) + (curi - insb);
-	i0 = alloc(n * sizeof(Ins));
-	ip = icpy(ip = i0, b->ins, i - b->ins);
-	ip = icpy(ir = ip, insb, curi - insb);
-	ip = icpy(ip, i1, &b->ins[b->nins] - i1);
-	b->nins = n;
-	b->ins = i0;
-	return ir;
-}
-
-static int
-prio(Ref r1, Ref r2)
-{
-	/* trivial heuristic to begin with,
-	 * later we can use the distance to
-	 * the definition instruction
-	 */
-	(void) r2;
-	return *hint(r1.val) != -1;
-}
-
-static void
-insert(Ref *r, Ref **rs, int p)
-{
-	int i;
-
-	rs[i = p] = r;
-	while (i-- > 0 && prio(*r, *rs[i])) {
-		rs[i+1] = rs[i];
-		rs[i] = r;
-	}
-}
-
-static void
-doblk(Blk *b, RMap *cur)
-{
-	int x, r, nr;
-	bits rs;
-	Ins *i;
-	Mem *m;
-	Ref *ra[4];
-
-	if (rtype(b->jmp.arg) == RTmp)
-		b->jmp.arg = ralloc(cur, b->jmp.arg.val);
-	else if (rtype(b->jmp.arg) == RACall) {
-		/* add return registers */
-		rs = retregs(b->jmp.arg, 0);
-		for (r=0; rs; rs/=2, r++)
-			if (rs & 1)
-				radd(cur, r, r);
-	}
-	for (i=&b->ins[b->nins]; i!=b->ins;) {
-		switch ((--i)->op) {
-		case OCall:
-			rs = argregs(i->arg[1], 0);
-			for (r=0; r<NRSave; r++)
-				if (!(BIT(rsave[r]) & rs))
-					rfree(cur, rsave[r]);
-			break;
-		case OCopy:
-			if (isreg(i->arg[0])) {
-				i = dopm(b, i, cur);
-				continue;
-			}
-			if (isreg(i->to))
-			if (rtype(i->arg[0]) == RTmp)
-				sethint(i->arg[0].val, i->to.val);
-			/* fall through */
-		default:
-			if (!req(i->to, R)) {
-				assert(rtype(i->to) == RTmp);
-				r = rfree(cur, i->to.val);
-				if (r == -1 && !isreg(i->to)) {
-					*i = (Ins){.op = ONop};
-					continue;
-				}
-				if (i->to.val >= Tmp0)
-					i->to = TMP(r);
-			}
-			break;
-		}
-		for (x=0, nr=0; x<2; x++)
-			switch (rtype(i->arg[x])) {
-			case RAMem:
-				m = &mem[i->arg[x].val & AMask];
-				if (rtype(m->base) == RTmp)
-					insert(&m->base, ra, nr++);
-				if (rtype(m->index) == RTmp)
-					insert(&m->index, ra, nr++);
-				break;
-			case RTmp:
-				insert(&i->arg[x], ra, nr++);
-				break;
-			}
-		for (r=0; r<nr; r++)
-			*ra[r] = ralloc(cur, ra[r]->val);
-	}
-}
-
-/* register allocation
- * depends on rpo, phi, cost, (and obviously spill)
- */
-void
-rega(Fn *fn)
-{
-	int j, n, t, r, r1, x, rl[Tmp0];
-	Blk *b, *b1, *s, ***ps, *blist;
-	RMap *end, *beg, cur, old;
-	Ins *i;
-	Phi *p;
-	uint u;
-	Ref src, dst;
-
-	/* 1. setup */
-	regu = 0;
-	tmp = fn->tmp;
-	mem = fn->mem;
-	end = alloc(fn->nblk * sizeof end[0]);
-	beg = alloc(fn->nblk * sizeof beg[0]);
-	for (n=0; n<fn->nblk; n++) {
-		bsinit(end[n].b, fn->ntmp);
-		bsinit(beg[n].b, fn->ntmp);
-	}
-	bsinit(cur.b, fn->ntmp);
-	bsinit(old.b, fn->ntmp);
-
-	for (t=Tmp0; t<fn->ntmp; t++)
-		*hint(t) = -1;
-	for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
-		if (i->op != OCopy || !isreg(i->arg[0]))
-			break;
-		else {
-			assert(rtype(i->to) == RTmp);
-			sethint(i->to.val, i->arg[0].val);
-		}
-
-	/* 2. assign registers following post-order */
-	for (n=fn->nblk-1; n>=0; n--) {
-		b = fn->rpo[n];
-		cur.n = 0;
-		bszero(cur.b);
-		for (x=0; x<2; x++)
-			for (t=Tmp0; t<fn->ntmp; t++) {
-				assert(bshas(b->out, t) ||
-					!bshas(cur.b, t));
-				if (bshas(b->out, t))
-				if (!bshas(cur.b, t))
-				if (x || (r=*hint(t)) != -1)
-				if (x || !bshas(cur.b, r))
-					ralloc(&cur, t);
-			}
-		rcopy(&end[n], &cur);
-		doblk(b, &cur);
-		bscopy(b->in, cur.b);
-		for (p=b->phi; p; p=p->link)
-			if (rtype(p->to) == RTmp) {
-				bsclr(b->in, p->to.val);
-				/* heuristic 0:
-				 * if the phi destination has an
-				 * argument from a frequent block
-				 * that was already allocated to
-				 * 'r', use 'r' as the new hint
-				 */
-				memset(rl, 0, sizeof rl);
-				for (u=0; u<p->narg; u++) {
-					t = p->arg[u].val;
-					b1 = p->blk[u];
-					if (rtype(p->arg[u]) == RTmp)
-					if ((r=rfind(&end[b1->id], t)) != -1)
-						rl[r] += b1->loop;
-				}
-				for (x=0, j=0; j<Tmp0; j++)
-					if (rl[j] > rl[x])
-						x = j;
-				if (rl[x] >= b->loop)
-					*hint(p->to.val) = x;
-			}
-		if (b->npred > 1) {
-			/* heuristic 1:
-			 * attempt to satisfy hints
-			 * when it's simple and we have
-			 * multiple predecessors
-			 */
-			rcopy(&old, &cur);
-			curi = &insb[NIns];
-			for (j=0; j<old.n; j++) {
-				t = old.t[j];
-				r = *hint(t);
-				r1 = rfind(&cur, t);
-				if (r != -1 && r != r1)
-				if (!bshas(cur.b, r)) {
-					rfree(&cur, t);
-					radd(&cur, t, r);
-					x = tmp[t].cls;
-					emit(OCopy, x, TMP(r1), TMP(r), R);
-				}
-			}
-			if ((j = &insb[NIns] - curi)) {
-				b->nins += j;
-				i = alloc(b->nins * sizeof(Ins));
-				icpy(icpy(i, curi, j), b->ins, b->nins-j);
-				b->ins = i;
-			}
-		}
-		rcopy(&beg[n], &cur);
-	}
-	if (debug['R'])  {
-		fprintf(stderr, "\n> Register mappings:\n");
-		for (n=0; n<fn->nblk; n++) {
-			b = fn->rpo[n];
-			fprintf(stderr, "\t%-10s beg", b->name);
-			mdump(&beg[n]);
-			fprintf(stderr, "\t           end");
-			mdump(&end[n]);
-		}
-		fprintf(stderr, "\n");
-	}
-
-	/* 3. compose glue code */
-	blist = 0;
-	for (b=fn->start;; b=b->link) {
-		ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
-		for (; (s=**ps); ps++) {
-			npm = 0;
-			for (p=s->phi; p; p=p->link) {
-				dst = p->to;
-				assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
-				if (rtype(dst) == RTmp) {
-					r = rfind(&beg[s->id], dst.val);
-					if (r == -1)
-						continue;
-					dst = TMP(r);
-				}
-				for (u=0; p->blk[u]!=b; u++)
-					assert(u+1 < p->narg);
-				src = p->arg[u];
-				if (rtype(src) == RTmp)
-					src = rref(&end[b->id], src.val);
-				pmadd(src, dst, p->cls);
-			}
-			for (t=Tmp0; t<fn->ntmp; t++)
-				if (bshas(s->in, t)) {
-					src = rref(&end[b->id], t);
-					dst = rref(&beg[s->id], t);
-					pmadd(src, dst, tmp[t].cls);
-				}
-			pmgen();
-			if (curi == insb)
-				continue;
-			b1 = blknew();
-			b1->loop = (b->loop+s->loop) / 2;
-			b1->link = blist;
-			blist = b1;
-			fn->nblk++;
-			sprintf(b1->name, "%s_%s", b->name, s->name);
-			b1->nins = curi - insb;
-			idup(&b1->ins, insb, b1->nins);
-			b1->jmp.type = JJmp;
-			b1->s1 = s;
-			**ps = b1;
-		}
-		if (!b->link) {
-			b->link = blist;
-			break;
-		}
-	}
-	for (b=fn->start; b; b=b->link)
-		b->phi = 0;
-	fn->reg = regu;
-
-	if (debug['R']) {
-		fprintf(stderr, "\n> After register allocation:\n");
-		printfn(fn, stderr);
-	}
-}