summary refs log tree commit diff
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2015-06-29 06:56:11 -0400
committerQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2015-09-15 23:01:27 -0400
commit762330d6fa626e57a0b350cd89680d59b3158ba6 (patch)
treeb22418242490d80fd291da7ea83fbdd35355f52f
parentbab23d801eb12767f12ae8a04c24fdb29ece5aa8 (diff)
downloadroux-762330d6fa626e57a0b350cd89680d59b3158ba6.tar.gz
try writing a parser, painful
-rw-r--r--lisc/lisc.h74
-rw-r--r--lisc/lo.c37
-rw-r--r--lisc/parse.c296
3 files changed, 383 insertions, 24 deletions
diff --git a/lisc/lisc.h b/lisc/lisc.h
index dbe4d63..d73cc63 100644
--- a/lisc/lisc.h
+++ b/lisc/lisc.h
@@ -1,33 +1,42 @@
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 
+/*
+  References have to be able to encode:
+    - Results of other operations (temporaries).
+    - Machine registers (for lowering).
+    - Spill locations.
+    - Constants/pointers to program data.
+*/
+
 enum {
-	R = -1,        /* Invalid reference. */
-	Temp0 = 32,    /* First temporary, below are machine registers. */
-	MaxPreds = 16, /* Maximum number of predecessors for a block. */
+	R = 0,          /* Invalid reference. */
+	Temp0 = 33,     /* First temporary ref. */
+	Const0 = 20000, /* First constant ref. */
+	MaxIdnt = 32,
+	MaxPreds = 16,
 	MaxBlks = 128,
 	MaxInss = 128,
 	MaxPhis = 128,
 };
 
-typedef int Ref;
+typedef unsigned Ref;
 typedef struct Ins Ins;
 typedef struct Phi Phi;
 typedef struct Blk Blk;
+typedef struct Fn Fn;
 typedef enum Op Op;
 typedef enum Jmp Jmp;
 
 enum Op {
-	ONop,
-	OAdd,
+	OAdd = 1,
 	OSub,
-	OSDiv,
+	ODiv,
 	OMod,
-	OParam,
-	OCall,
 
-	/* x86 instructions (reserved) */
-	XDiv,
+	/* Reserved instructions. */
+	X86Div,
 };
 
 enum Jmp {
@@ -49,14 +58,51 @@ struct Phi {
 };
 
 struct Blk {
-	int np;
-	int ni;
 	Phi *ps;
 	Ins *is;
+	int np;
+	int ni;
 	struct {
 		Jmp ty;
 		Ref arg;
 	} jmp;
 	int suc0, suc1;
-	int dpth;
+	int lvl;
+};
+
+struct Sym {
+	enum {
+		SUndef,
+		SReg,
+		SNum,
+		STemp,
+	} ty;
+	union {
+		char sreg[MaxIdnt];
+		long long snum;
+		struct {
+			char id[MaxIdnt];
+			int blk;
+			enum {
+				TPhi,
+				TIns,
+			} ty;
+			int loc;
+		} stemp;
+	} u;
+};
+
+#define sreg u.sreg
+#define snum u.snum
+#define stemp u.stemp
+
+struct Fn {
+	Sym *sym;
+	Blk *blk;
+	int nblk;
+	Ref nref;
 };
+
+
+/* parse.c */
+Fn *parsefn(FILE *);
diff --git a/lisc/lo.c b/lisc/lo.c
index 931385e..19b3a7d 100644
--- a/lisc/lo.c
+++ b/lisc/lo.c
@@ -1,8 +1,5 @@
-/*% clang -Wall -o # %
- */
 #include "lisc.h"
 
-
 static void
 rporec(int *rpo, int *cnt, Blk *bs, int b)
 {
@@ -16,7 +13,7 @@ rporec(int *rpo, int *cnt, Blk *bs, int b)
 }
 
 int
-dorpo(Blk *bs, int nb)
+rposched(Blk *bs, int nb)
 {
 	static int rpo[MaxBlks];
 	Blk t;
@@ -55,6 +52,23 @@ dorpo(Blk *bs, int nb)
 }
 
 
+void
+cfdump(FILE *f, char *pname, Blk *bs, int nb)
+{
+	Blk *b;
+	int i;
+
+	fprintf(f, "digraph %s {\n", pname);
+	for (b=bs, i=0; i<nb; i++, b++) {
+		if (b->suc0 >= 0)
+			fprintf(f, "b%d -> b%d;\n", i, b->suc0);
+		if (b->suc1 >= 0)
+			fprintf(f, "b%d -> b%d;\n", i, b->suc1);
+		fprintf(f, "b%d [shape=box]\n", i);
+	}
+	fprintf(f, "}\n");
+}
+
 #define LEN(a) sizeof a / sizeof a[0]
 
 Blk rpocond[] = {
@@ -69,14 +83,17 @@ int
 main()
 {
 	Blk *bs;
-	int i, nb;
+	int nb;
+	FILE *f;
 
 	bs = rpocond;
 	nb = LEN(rpocond);
-	nb = dorpo(bs, nb);
-	for (i=0; i<nb; i++) {
-		printf("%02d -> [%02d, %02d]\n", i,
-			bs[i].suc0, bs[i].suc1);
-	}
+
+	f = fopen("cf.dot", "w");
+	cfdump(f, "bef", bs, nb);
+	nb = rposched(bs, nb);
+	cfdump(f, "aft", bs, nb);
+	fclose(f);
+
 	return 0;
 }
diff --git a/lisc/parse.c b/lisc/parse.c
new file mode 100644
index 0000000..86720b9
--- /dev/null
+++ b/lisc/parse.c
@@ -0,0 +1,296 @@
+/* A really crude parser. */
+#include <ctype.h>
+#include <string.h>
+
+#include "lisc.h"
+
+enum {
+	MaxRefs = 256,
+};
+
+typedef enum Token Token;
+typedef enum PState PState;
+
+enum PState { /* Parsing state in a block. */
+	PErr,
+	PPhi,
+	PIns,
+	PEnd,
+};
+
+enum Token {
+	TAdd,
+	TSub,
+	TDiv,
+	TMod,
+	TPhi,
+	TJmp,
+	TCnd,
+	TRet,
+
+	TNum,
+	TVar,
+	TEq,
+	TComma,
+	TLParen,
+	TRParen,
+	TNL,
+	TEOF,
+};
+
+
+static FILE *inf;
+static char *errstr;
+
+static Sym sym[MaxRef];
+static Ref nref;
+static Blk blk[MaxBlks], *curblk;
+static Phi phi[MaxPhis], *curphi;
+static Ins ins[MaxInss], *curins;
+
+static struct {
+	long long num;
+	char *var;
+} tokval;
+static int lnum;
+
+
+static void
+err(char *s)
+{
+	if (!s)
+		s = errstr;
+	assert(s);
+	fprintf(stderr, "parse error: %s (line %d)\n", s, lnum);
+	exit(1);
+}
+
+static Token
+token()
+{
+	static struct {
+		char *str;
+		Token tok;
+	} tmap[] = {
+		{ "add", TAdd },
+		{ "sub", TSub },
+		{ "div", TDiv },
+		{ "mod", TMod },
+		{ "phi", TPhi },
+		{ "jmp", TJmp },
+		{ "cnd", TCnd },
+		{ "ret", TRet },
+		{ 0 },
+	};
+	static char tok[MaxIdnt];
+	int c, i, var, sgn;
+
+	do
+		c = fgetc(inf);
+	while (isblank(c));
+	switch (c) {
+	case EOF:
+		return TEOF;
+	case ',':
+		return TComma;
+	case '(':
+		return TLParen;
+	case ')':
+		return TRParen;
+	case '=':
+		return TEq;
+	case '\n':
+		lnum++;
+		return TNL;
+	}
+	if (isdigit(c) || c == '-') {
+		if (c == '-') {
+			tokval.num = 0;
+			sgn = -1;
+		} else {
+			tokval.num = c - '0';
+			sgn = 1;
+		}
+		for (;;) {
+			c = fgetc(inf);
+			if (!isdigit(c))
+				break;
+			tokval.num *= 10;
+			tokval.num += c - '0';
+		}
+		ungetc(c, f);
+		tokval.num *= sgn;
+		return TNum;
+	}
+	if (c == '%') {
+		var = 1;
+		c = fgetc(inf);
+	} else
+		var = 0;
+	if (!isalpha(c))
+		err("lexing failure");
+	i = 0;
+	do {
+		if (i >= MaxIdnt-1)
+			err("identifier too long");
+		tok[i++] = c;
+		c = fgetc(inf);
+	} while (isalpha(c) || isdigit(c));
+	tok[i] = 0;
+	ungetc(c, f);
+	if (var) {
+		tokval.var = tok;
+		return TVar;
+	}
+	for (i=0; tmap[i].str; i++)
+		if (strcmp(tok, tmap[i].str) == 0)
+			return tmap[i].tok;
+	err("unknown keyword");
+	return -1;
+}
+
+static Ref
+varref(char *v)
+{
+	Ref r;
+
+	for (r=Temp0; r<nref; r++)
+		if (sym[r].ty == STemp)
+		if (strcmp(v, sym[r].stemp.id) == 0)
+			return r;
+	if (r >= MaxRefs)
+		err("too many references");
+	sym[r].ty = STemp;
+	strcpy(sym[r].stemp.id, v);
+	sym[r].stemp.blk = -1;
+	return nref++;
+}
+
+static Ref
+parseref()
+{
+	Ref r;
+
+	switch (token()) {
+	case TVar:
+		return varref(tokval.var);
+	case TNum:
+		/* Add the constant to the symbol table. */
+		for (r=Temp0; r<nref; r++)
+			if (sym[r].ty == SNum)
+			if (sym[r].snum == tokval.num)
+				return r;
+		if (r >= MaxRefs)
+			err("too many references");
+		sym[r].ty = SNum;
+		sym[r].snum = tokval.num;
+		retunr nref++;
+	default:
+		errstr = "number or variable expected";
+		return R;
+	}
+}
+
+static PState
+parseline(PState ps)
+{
+	Ref args[MaxPreds];
+	Ref r;
+	Token t;
+	int na;
+
+	t = token();
+	switch (t) {
+	default:
+		errstr = "variable or jump expected";
+		return PErr;
+	case TVar:
+		break;
+	case TRet:
+		curblk->jmp.ty = JRet;
+		goto Jump;
+	case TJmp:
+		curblk->jmp.ty = JJmp;
+		goto Jump;
+	case TCnd:
+		curblk->jmp.ty = JCnd;
+		r = parseref();
+		if (r == R) {
+			errstr = "invalid argument for cnd";
+			return PErr;
+		}
+		if (token() != TComma) {
+			errstr = "missing comma";
+			return PErr;
+		}
+		curblk->jmp.arg = r;
+	Jump:
+		if (
+		return PEnd;
+	}
+	r = varref(tokval.var);
+	if (token() != TEq) {
+		errstr = "= expected after variable";
+		return PErr;
+	}
+	switch (token()) {
+	case TAdd:
+		curins->op = OAdd;
+		break;
+	case TSub:
+		curins->op = OSub;
+		break;
+	case TDiv:
+		curins->op = ODiv;
+		break;
+	case TMod:
+		curins->op = OMod;
+		break;
+	case TPhi:
+		return PIns;
+	default:
+		errstr = "invalid instruction";
+		return PErr;
+	}
+	na = 0;
+	for (;;) {
+		x
+	}
+}
+
+
+
+
+
+
+// in main parsing routine, reset lnum to 1
+// also reset curxxx
+// also nsym to Temp0
+// also sym[0..nsym-1].ty = SReg
+
+#if 0
+int main()
+{
+	char *toknames[] = {
+		"TAdd",
+		"TSub",
+		"TDiv",
+		"TMod",
+		"TPhi",
+		"TJmp",
+		"TCnd",
+		"TRet",
+		"TNum",
+		"TVar",
+		"TEq",
+		"TComma",
+		"TLParen",
+		"TRParen",
+		"TNL",
+		"TEOF",
+	};
+	inf = stdin;
+	for (;;)
+		printf("%s\n", toknames[token()]);
+}
+#endif