summary refs log tree commit diff
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin@c9x.me>2017-05-17 09:40:07 -0400
committerQuentin Carbonneaux <quentin@c9x.me>2017-05-17 10:05:28 -0400
commita3a1451c5fabb5c94f7fbeb13fdc6b1e2c23181f (patch)
tree1fb1ba60e8f3cab09c629ce9dd63e00e01974c39
parent2d02070af019e9896ecf2a63bedc098092fd395d (diff)
downloadroux-a3a1451c5fabb5c94f7fbeb13fdc6b1e2c23181f.tar.gz
intern symbol names
Symbols in the source file are still limited in
length because the rest of the code assumes that
strings always fit in NString bytes.

Regardless, there is already a benefit because
comparing/copying symbol names does not require
using strcmp()/strcpy() anymore.
-rw-r--r--alias.c4
-rw-r--r--all.h13
-rw-r--r--amd64/emit.c8
-rw-r--r--amd64/isel.c4
-rw-r--r--arm64/emit.c4
-rw-r--r--arm64/isel.c4
-rw-r--r--fold.c23
-rw-r--r--load.c2
-rw-r--r--parse.c17
-rw-r--r--util.c151
10 files changed, 143 insertions, 87 deletions
diff --git a/alias.c b/alias.c
index 77d5d73..66c12e5 100644
--- a/alias.c
+++ b/alias.c
@@ -18,7 +18,7 @@ getalias(Alias *a, Ref r, Fn *fn)
 		c = &fn->con[r.val];
 		if (c->type == CAddr) {
 			a->type = ASym;
-			strcpy(a->label, c->label);
+			a->label = c->label;
 		} else
 			a->type = ACon;
 		a->offset = c->bits.i;
@@ -51,7 +51,7 @@ alias(Ref p, int sp, Ref q, int sq, int *delta, Fn *fn)
 		/* they conservatively alias if the
 		 * symbols are different, or they
 		 * alias for sure if they overlap */
-		if (strcmp(ap.label, aq.label) != 0)
+		if (ap.label != aq.label)
 			return MayAlias;
 		if (ovlap)
 			return MustAlias;
diff --git a/all.h b/all.h
index 1e9476d..b9ac9b2 100644
--- a/all.h
+++ b/all.h
@@ -275,7 +275,7 @@ struct Alias {
 	#define astack(t) ((t) & 1)
 	} type;
 	Ref base;
-	char label[NString];
+	uint32_t label;
 	int64_t offset;
 	Alias *slot;
 };
@@ -312,7 +312,7 @@ struct Con {
 		CBits,
 		CAddr,
 	} type;
-	char label[NString];
+	uint32_t label;
 	union {
 		int64_t i;
 		double d;
@@ -411,19 +411,22 @@ typedef enum {
 
 extern Typ *typ;
 extern Ins insb[NIns], *curi;
+uint32_t hash(char *);
 void die_(char *, char *, ...) __attribute__((noreturn));
 void *emalloc(size_t);
 void *alloc(size_t);
 void freeall(void);
+void *vnew(ulong, size_t, Pool);
+void vfree(void *);
+void vgrow(void *, ulong);
+uint32_t intern(char *);
+char *str(uint32_t);
 int argcls(Ins *, int);
 int iscmp(int, int *, int *);
 void emit(int, int, Ref, Ref, Ref);
 void emiti(Ins);
 void idup(Ins **, Ins *, ulong);
 Ins *icpy(Ins *, Ins *, ulong);
-void *vnew(ulong, size_t, Pool);
-void vfree(void *);
-void vgrow(void *, ulong);
 int cmpop(int);
 int cmpneg(int);
 int clsmerge(short *, short);
diff --git a/amd64/emit.c b/amd64/emit.c
index eccbd02..cd485bb 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -161,12 +161,12 @@ slot(int s, Fn *fn)
 static void
 emitcon(Con *con, FILE *f)
 {
+	char *p;
+
 	switch (con->type) {
 	case CAddr:
-		if (con->local)
-			fprintf(f, "%s%s", gasloc, con->label);
-		else
-			fprintf(f, "%s%s", gassym, con->label);
+		p = con->local ? gasloc : gassym;
+		fprintf(f, "%s%s", p, str(con->label));
 		if (con->bits.i)
 			fprintf(f, "%+"PRId64, con->bits.i);
 		break;
diff --git a/amd64/isel.c b/amd64/isel.c
index 0dbaad3..39fc9e8 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -61,6 +61,7 @@ rslot(Ref r, Fn *fn)
 static void
 fixarg(Ref *r, int k, int op, Fn *fn)
 {
+	char buf[32];
 	Addr a, *m;
 	Ref r0, r1;
 	int s, n, cpy, mem;
@@ -80,7 +81,8 @@ fixarg(Ref *r, int k, int op, Fn *fn)
 		a.offset.type = CAddr;
 		a.offset.local = 1;
 		n = gasstashfp(fn->con[r0.val].bits.i, KWIDE(k));
-		sprintf(a.offset.label, "fp%d", n);
+		sprintf(buf, "fp%d", n);
+		a.offset.label = intern(buf);
 		fn->mem[fn->nmem-1] = a;
 	}
 	else if (!cpy && k == Kl && noimm(r0, fn)) {
diff --git a/arm64/emit.c b/arm64/emit.c
index 1b71179..8211c4f 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -244,9 +244,9 @@ loadcon(Con *c, int r, int k, FILE *f)
 			off[0] = 0;
 		p = c->local ? ".L" : "";
 		fprintf(f, "\tadrp\t%s, %s%s%s\n",
-			rn, p, c->label, off);
+			rn, p, str(c->label), off);
 		fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n",
-			rn, rn, p, c->label, off);
+			rn, rn, p, str(c->label), off);
 		return;
 	}
 	assert(c->type == CBits);
diff --git a/arm64/isel.c b/arm64/isel.c
index 2d4e995..7ab368f 100644
--- a/arm64/isel.c
+++ b/arm64/isel.c
@@ -69,6 +69,7 @@ Check:
 static void
 fixarg(Ref *pr, int k, int phi, Fn *fn)
 {
+	char buf[32];
 	Ref r0, r1, r2;
 	int s, n;
 	Con *c;
@@ -86,8 +87,9 @@ fixarg(Ref *pr, int k, int phi, Fn *fn)
 			n = gasstashfp(c->bits.i, KWIDE(k));
 			vgrow(&fn->con, ++fn->ncon);
 			c = &fn->con[fn->ncon-1];
+			sprintf(buf, "fp%d", n);
 			*c = (Con){.type = CAddr, .local = 1};
-			sprintf(c->label, "fp%d", n);
+			c->label = intern(buf);
 			r2 = newtmp("isel", Kl, fn);
 			emit(Oload, k, r1, r2, R);
 			emit(Ocopy, Kl, r2, CON(c-fn->con), R);
diff --git a/fold.c b/fold.c
index 55672dd..c2375df 100644
--- a/fold.c
+++ b/fold.c
@@ -333,8 +333,10 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 		double fd;
 	} l, r;
 	uint64_t x;
-	char *lab;
+	uint32_t lab;
+	int typ;
 
+	typ = CBits;
 	lab = 0;
 	l.s = cl->bits.i;
 	r.s = cr->bits.i;
@@ -343,15 +345,19 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 			if (cr->type == CAddr)
 				err("undefined addition (addr + addr)");
 			lab = cl->label;
+			typ = CAddr;
 		}
-		else if (cr->type == CAddr)
+		else if (cr->type == CAddr) {
 			lab = cr->label;
+			typ = CAddr;
+		}
 	}
 	else if (op == Osub) {
 		if (cl->type == CAddr) {
-			if (cr->type != CAddr)
+			if (cr->type != CAddr) {
 				lab = cl->label;
-			else if (strcmp(cl->label, cr->label) != 0)
+				typ = CAddr;
+			} else if (cl->label != cr->label)
 				err("undefined substraction (addr1 - addr2)");
 		}
 		else if (cr->type == CAddr)
@@ -386,8 +392,10 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 	case Odtosi: x = w ? (int64_t)cl->bits.d : (int32_t)cl->bits.d; break;
 	case Ocast:
 		x = l.u;
-		if (cl->type == CAddr)
+		if (cl->type == CAddr) {
 			lab = cl->label;
+			typ = CAddr;
+		}
 		break;
 	default:
 		if (Ocmpw <= op && op <= Ocmpl1) {
@@ -439,10 +447,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
 		else
 			die("unreachable");
 	}
-	*res = (Con){lab ? CAddr : CBits, .bits={.i=x}};
-	res->bits.i = x;
-	if (lab)
-		strcpy(res->label, lab);
+	*res = (Con){.type=typ, .label=lab, .bits={.i=x}};
 	return 0;
 }
 
diff --git a/load.c b/load.c
index bb4acbc..31d00bc 100644
--- a/load.c
+++ b/load.c
@@ -151,7 +151,7 @@ load(Slice sl, bits msk, Loc *l)
 			c = curf->ncon++;
 			vgrow(&curf->con, curf->ncon);
 			curf->con[c].type = CAddr;
-			strcpy(curf->con[c].label, a->label);
+			curf->con[c].label = a->label;
 			curf->con[c].bits.i = a->offset;
 			curf->con[c].local = 0;
 			r = CON(c);
diff --git a/parse.c b/parse.c
index b7bab5b..f4d4909 100644
--- a/parse.c
+++ b/parse.c
@@ -143,17 +143,6 @@ err(char *s, ...)
 	exit(1);
 }
 
-static inline uint32_t
-hash(char *s)
-{
-	uint32_t h;
-
-	h = 0;
-	for (; *s; ++s)
-		h = *s + 17*h;
-	return h;
-}
-
 static void
 lexinit()
 {
@@ -394,12 +383,12 @@ parseref()
 		goto Look;
 	case Tglo:
 		c.type = CAddr;
-		strcpy(c.label, tokval.str);
+		c.label = intern(tokval.str);
 	Look:
 		for (i=0; i<curf->ncon; i++)
 			if (curf->con[i].type == c.type
 			&& curf->con[i].bits.i == c.bits.i
-			&& strcmp(curf->con[i].label, c.label) == 0)
+			&& curf->con[i].label == c.label)
 				return CON(i);
 		vgrow(&curf->con, ++curf->ncon);
 		curf->con[i] = c;
@@ -1091,7 +1080,7 @@ printcon(Con *c, FILE *f)
 	case CUndef:
 		break;
 	case CAddr:
-		fprintf(f, "$%s", c->label);
+		fprintf(f, "$%s", str(c->label));
 		if (c->bits.i)
 			fprintf(f, "%+"PRIi64, c->bits.i);
 		break;
diff --git a/util.c b/util.c
index cb6e75c..8527931 100644
--- a/util.c
+++ b/util.c
@@ -3,6 +3,7 @@
 
 typedef struct Bitset Bitset;
 typedef struct Vec Vec;
+typedef struct Bucket Bucket;
 
 struct Vec {
 	ulong mag;
@@ -16,10 +17,17 @@ struct Vec {
 	} align[];
 };
 
+struct Bucket {
+	uint nstr;
+	char **str;
+};
+
 enum {
 	VMin = 2,
 	VMag = 0xcabba9e,
 	NPtr = 256,
+	IBits = 12,
+	IMask = (1<<IBits) - 1,
 };
 
 Typ *typ;
@@ -29,6 +37,18 @@ static void *ptr[NPtr];
 static void **pool = ptr;
 static int nptr = 1;
 
+static Bucket itbl[IMask+1]; /* string interning table */
+
+uint32_t
+hash(char *s)
+{
+	uint32_t h;
+
+	for (h=0; *s; ++s)
+		h = *s + 17*h;
+	return h;
+}
+
 void
 die_(char *file, char *s, ...)
 {
@@ -87,6 +107,88 @@ freeall()
 	nptr = 1;
 }
 
+void *
+vnew(ulong len, size_t esz, Pool pool)
+{
+	void *(*f)(size_t);
+	ulong cap;
+	Vec *v;
+
+	for (cap=VMin; cap<len; cap*=2)
+		;
+	f = pool == Pheap ? emalloc : alloc;
+	v = f(cap * esz + sizeof(Vec));
+	v->mag = VMag;
+	v->cap = cap;
+	v->esz = esz;
+	v->pool = pool;
+	return v + 1;
+}
+
+void
+vfree(void *p)
+{
+	Vec *v;
+
+	v = (Vec *)p - 1;
+	assert(v->mag == VMag);
+	if (v->pool == Pheap) {
+		v->mag = 0;
+		free(v);
+	}
+}
+
+void
+vgrow(void *vp, ulong len)
+{
+	Vec *v;
+	void *v1;
+
+	v = *(Vec **)vp - 1;
+	assert(v+1 && v->mag == VMag);
+	if (v->cap >= len)
+		return;
+	v1 = vnew(len, v->esz, v->pool);
+	memcpy(v1, v+1, v->cap * v->esz);
+	vfree(v+1);
+	*(Vec **)vp = v1;
+}
+
+uint32_t
+intern(char *s)
+{
+	Bucket *b;
+	uint32_t h;
+	uint i, n;
+
+	h = hash(s) & IMask;
+	b = &itbl[h];
+	n = b->nstr;
+
+	for (i=0; i<n; i++)
+		if (strcmp(s, b->str[i]) == 0)
+			return h + (i<<IBits);
+
+	if (n == 1<<(32-IBits))
+		die("interning table overflow");
+	if (n == 0)
+		b->str = vnew(1, sizeof b->str[0], Pheap);
+	else if ((n & (n-1)) == 0)
+		vgrow(&b->str, n+n);
+
+	b->str[n] = emalloc(strlen(s)+1);
+	b->nstr = n + 1;
+	strcpy(b->str[n], s);
+	return h + (n<<IBits);
+}
+
+char *
+str(uint32_t id)
+{
+	assert(id>>IBits < itbl[id&IMask].nstr);
+	return itbl[id&IMask].str[id>>IBits];
+}
+
 int
 iscmp(int op, int *pk, int *pc)
 {
@@ -148,53 +250,6 @@ icpy(Ins *d, Ins *s, ulong n)
 	return d + n;
 }
 
-void *
-vnew(ulong len, size_t esz, Pool pool)
-{
-	void *(*f)(size_t);
-	ulong cap;
-	Vec *v;
-
-	for (cap=VMin; cap<len; cap*=2)
-		;
-	f = pool == Pheap ? emalloc : alloc;
-	v = f(cap * esz + sizeof(Vec));
-	v->mag = VMag;
-	v->cap = cap;
-	v->esz = esz;
-	v->pool = pool;
-	return v + 1;
-}
-
-void
-vfree(void *p)
-{
-	Vec *v;
-
-	v = (Vec *)p - 1;
-	assert(v->mag == VMag);
-	if (v->pool == Pheap) {
-		v->mag = 0;
-		free(v);
-	}
-}
-
-void
-vgrow(void *vp, ulong len)
-{
-	Vec *v;
-	void *v1;
-
-	v = *(Vec **)vp - 1;
-	assert(v+1 && v->mag == VMag);
-	if (v->cap >= len)
-		return;
-	v1 = vnew(len, v->esz, v->pool);
-	memcpy(v1, v+1, v->cap * v->esz);
-	vfree(v+1);
-	*(Vec **)vp = v1;
-}
-
 static int cmptab[][2] ={
 	             /* negation    swap */
 	[Ciule]      = {Ciugt,      Ciuge},
@@ -308,7 +363,7 @@ addcon(Con *c0, Con *c1)
 		if (c1->type == CAddr) {
 			assert(c0->type != CAddr && "adding two addresses");
 			c0->type = CAddr;
-			strcpy(c0->label, c1->label);
+			c0->label = c1->label;
 		}
 		c0->bits.i += c1->bits.i;
 	}