summary refs log tree commit diff
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2016-12-30 21:57:04 -0500
committerQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2016-12-30 22:05:27 -0500
commitcd458524b3a6008694e349766e514411893601fa (patch)
tree4de7d2418a964c8b183021dc21077820beadc9d9
parented8fe831fe583e50c5b3a0c0096deaf350b0c309 (diff)
downloadroux-cd458524b3a6008694e349766e514411893601fa.tar.gz
new tool to improve lexing speed
-rw-r--r--tools/lexh.c91
1 files changed, 91 insertions, 0 deletions
diff --git a/tools/lexh.c b/tools/lexh.c
new file mode 100644
index 0000000..b61dc06
--- /dev/null
+++ b/tools/lexh.c
@@ -0,0 +1,91 @@
+/*% c99 -O3 -Wall -o # %
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+typedef unsigned int uint;
+
+char *tok[] = {
+
+	"add", "sub", "div", "rem", "udiv", "urem", "mul",
+	"and", "or", "xor", "sar", "shr", "shl", "stored",
+	"stores", "storel", "storew", "storeh", "storeb",
+	"load", "loadsw", "loaduw", "loadsh", "loaduh",
+	"loadsb", "loadub", "extsw", "extuw", "extsh",
+	"extuh", "extsb", "extub", "exts", "truncd",
+	"stosi", "dtosi", "swtof", "sltof", "cast", "copy",
+	"alloc4", "alloc8", "alloc16", "culew", "cultw",
+	"cslew", "csltw", "csgtw", "csgew", "cugtw",
+	"cugew", "ceqw", "cnew", "culel", "cultl", "cslel",
+	"csltl", "csgtl", "csgel", "cugtl", "cugel",
+	"ceql", "cnel", "cles", "clts", "cgts", "cges",
+	"cnes", "ceqs", "cos", "cuos", "cled", "cltd",
+	"cgtd", "cged", "cned", "ceqd", "cod", "cuod",
+
+	"call", "phi", "jmp", "jnz", "ret", "export",
+	"function", "type", "data", "align", "l", "w",
+	"h", "b", "d", "s", "z", "loadw", "loadl", "loads",
+	"loadd", "alloc1", "alloc2",
+
+};
+enum {
+	Ntok = sizeof tok / sizeof tok[0]
+};
+
+uint th[Ntok];
+
+uint
+hash(char *s)
+{
+	uint h;
+
+	h = 0;
+	for (; *s; ++s)
+		h = *s + 5*h;
+	return h;
+}
+
+int
+main()
+{
+	char *bmap;
+	uint h;
+	int i, j;
+	int M, K;
+
+	bmap = malloc(1u << 31);
+
+	for (i=0; i<Ntok; ++i) {
+		h = hash(tok[i]);
+		for (j=0; j<i; ++j)
+			if (th[j] == h) {
+				printf("error: hash()\n");
+				printf("\t%s\n", tok[i]);
+				printf("\t%s\n", tok[j]);
+				exit(1);
+			}
+		th[i] = h;
+	}
+
+	for (i=0; 1<<i < Ntok; ++i);
+	M = 32 - i;
+
+	for (;; --M) {
+		printf("trying M=%d...\n", M);
+		for (K = 1; K<UINT_MAX-2; K+=2) {
+			memset(bmap, 0, 1 << (32 - M));
+			for (i=0; i<Ntok; ++i) {
+				h = (th[i]*K) >> M;
+				if (bmap[h])
+					break;
+				bmap[h] = 1;
+			}
+			if (i==Ntok) {
+				printf("found K=%d for M=%d\n", K, M);
+				exit(0);
+			}
+		}
+	}
+}