summary refs log tree commit diff
path: root/lisc/parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'lisc/parse.c')
-rw-r--r--lisc/parse.c386
1 files changed, 274 insertions, 112 deletions
diff --git a/lisc/parse.c b/lisc/parse.c
index 86720b9..4e6ec85 100644
--- a/lisc/parse.c
+++ b/lisc/parse.c
@@ -1,24 +1,27 @@
-/* A really crude parser. */
+/* really crude parser
+ */
 #include <ctype.h>
 #include <string.h>
 
 #include "lisc.h"
 
 enum {
-	MaxRefs = 256,
+	NTemps = 256,
 };
 
 typedef enum Token Token;
 typedef enum PState PState;
 
-enum PState { /* Parsing state in a block. */
-	PErr,
+enum PState {
+	PXXX,
+	PLbl,
 	PPhi,
 	PIns,
 	PEnd,
 };
 
 enum Token {
+	TXXX,
 	TAdd,
 	TSub,
 	TDiv,
@@ -30,6 +33,7 @@ enum Token {
 
 	TNum,
 	TVar,
+	TLbl,
 	TEq,
 	TComma,
 	TLParen,
@@ -41,23 +45,40 @@ enum Token {
 
 static FILE *inf;
 static char *errstr;
+static Token thead;
 
-static Sym sym[MaxRef];
-static Ref nref;
-static Blk blk[MaxBlks], *curblk;
-static Phi phi[MaxPhis], *curphi;
-static Ins ins[MaxInss], *curins;
+static Sym sym[NTemps];
+static Ref ntemp;
+static Phi phis[NPhis], *curp;
+static Ins inss[NInss], *curi;
+static struct {
+	char name[NString];
+	Blk *blk;
+} blks[NBlks+1];
+static Blk *curb;
 
 static struct {
 	long long num;
-	char *var;
+	char *str;
 } tokval;
 static int lnum;
 
 
+void *
+alloc(size_t n)
+{
+	void *p;
+
+	p = malloc(n);
+	if (!p)
+		abort();
+	return p;
+}
+
 static void
 err(char *s)
 {
+	/* todo, export the error handling in util.c */
 	if (!s)
 		s = errstr;
 	assert(s);
@@ -66,7 +87,7 @@ err(char *s)
 }
 
 static Token
-token()
+lex()
 {
 	static struct {
 		char *str;
@@ -82,8 +103,9 @@ token()
 		{ "ret", TRet },
 		{ 0 },
 	};
-	static char tok[MaxIdnt];
-	int c, i, var, sgn;
+	static char tok[NString];
+	int c, i, sgn;
+	Token t;
 
 	do
 		c = fgetc(inf);
@@ -118,29 +140,32 @@ token()
 			tokval.num *= 10;
 			tokval.num += c - '0';
 		}
-		ungetc(c, f);
+		ungetc(c, inf);
 		tokval.num *= sgn;
 		return TNum;
 	}
 	if (c == '%') {
-		var = 1;
+		t = TVar;
+		c = fgetc(inf);
+	} else if (c == '@') {
+		t = TLbl;
 		c = fgetc(inf);
 	} else
-		var = 0;
+		t = TXXX;
 	if (!isalpha(c))
 		err("lexing failure");
 	i = 0;
 	do {
-		if (i >= MaxIdnt-1)
+		if (i >= NString-1)
 			err("identifier too long");
 		tok[i++] = c;
 		c = fgetc(inf);
 	} while (isalpha(c) || isdigit(c));
 	tok[i] = 0;
-	ungetc(c, f);
-	if (var) {
-		tokval.var = tok;
-		return TVar;
+	ungetc(c, inf);
+	if (t != TXXX) {
+		tokval.str = tok;
+		return t;
 	}
 	for (i=0; tmap[i].str; i++)
 		if (strcmp(tok, tmap[i].str) == 0)
@@ -149,148 +174,285 @@ token()
 	return -1;
 }
 
+static Token
+peek()
+{
+	if (thead == TXXX)
+		thead = lex();
+	return thead;
+}
+
+static Token
+next()
+{
+	Token t;
+
+	t = peek();
+	thead = TXXX;
+	return t;
+}
+
+static Blk *
+blocka()
+{
+	static Blk zblock;
+	Blk *b;
+
+	b = alloc(sizeof *b);
+	*b = zblock;
+	return b;
+}
+
 static Ref
 varref(char *v)
 {
-	Ref r;
+	int t;
 
-	for (r=Temp0; r<nref; r++)
-		if (sym[r].ty == STemp)
-		if (strcmp(v, sym[r].stemp.id) == 0)
-			return r;
-	if (r >= MaxRefs)
-		err("too many references");
-	sym[r].ty = STemp;
-	strcpy(sym[r].stemp.id, v);
-	sym[r].stemp.blk = -1;
-	return nref++;
+	for (t=Temp0; t<ntemp; t++)
+		if (sym[t].type == STemp)
+		if (strcmp(v, sym[t].name) == 0)
+			return TEMP(t);
+	if (t >= NTemps)
+		err("too many temporaries");
+	sym[t].type = STemp;
+	strcpy(sym[t].name, v);
+	sym[t].blk = 0;
+	return TEMP(t);
 }
 
 static Ref
 parseref()
 {
-	Ref r;
-
-	switch (token()) {
+	switch (next()) {
 	case TVar:
-		return varref(tokval.var);
+		return varref(tokval.str);
 	case TNum:
-		/* Add the constant to the symbol table. */
-		for (r=Temp0; r<nref; r++)
-			if (sym[r].ty == SNum)
-			if (sym[r].snum == tokval.num)
-				return r;
-		if (r >= MaxRefs)
-			err("too many references");
-		sym[r].ty = SNum;
-		sym[r].snum = tokval.num;
-		retunr nref++;
+		if (tokval.num > NRefs || tokval.num < 0)
+			/* todo, use constants table */
+			abort();
+		return CONST(tokval.num);
 	default:
 		errstr = "number or variable expected";
 		return R;
 	}
 }
 
+static Blk *
+findblk(char *name)
+{
+	int i;
+
+	assert(name[0]);
+	for (i=0; i<NBlks; i++)
+		if (!blks[i].blk || strcmp(blks[i].name, name) == 0)
+			break;
+	if (i == NBlks)
+		err("too many blocks");
+	if (!blks[i].blk) {
+		assert(blks[i].name[0] == 0);
+		strcpy(blks[i].name, name);
+		blks[i].blk = blocka();
+		strcpy(blks[i].blk->name, name);
+	}
+	return blks[i].blk;
+}
+
+static void
+expect(Token t)
+{
+	static char *names[] = {
+		[TLbl] = "label",
+		[TComma] = "comma",
+		[TEq] = "=",
+		[TNL] = "newline",
+		[TEOF] = 0,
+	};
+	char buf[128], *s1, *s2;
+	Token t1;
+
+	t1 = next();
+	if (t == t1)
+		return;
+	s1 = names[t] ? names[t] : "??";
+	s2 = names[t1] ? names[t1] : "??";
+	snprintf(buf, sizeof buf,
+		"%s expected (got %s instead)", s1, s2);
+	err(buf);
+}
+
 static PState
 parseline(PState ps)
 {
-	Ref args[MaxPreds];
+	Ref args[NPreds] = {0};
 	Ref r;
 	Token t;
-	int na;
+	Blk *b;
+	int op, i, j;
 
-	t = token();
+	assert(args[0] == R);
+	do
+		t = next();
+	while (t == TNL);
+	if (ps == PLbl && t != TLbl && t != TEOF)
+		err("label or end of file expected");
 	switch (t) {
 	default:
-		errstr = "variable or jump expected";
-		return PErr;
+		err("label, instruction or jump expected");
+	case TEOF:
+		return PEnd;
 	case TVar:
 		break;
+	case TLbl:
+		b = findblk(tokval.str);
+		if (curb) {
+			curb->np = curp - phis;
+			curb->ni = curi - inss;
+			curb->ps = alloc(curb->np * sizeof(Phi));
+			curb->is = alloc(curb->ni * sizeof(Ins));
+			memcpy(curb->ps, curp, curb->np * sizeof(Phi));
+			memcpy(curb->is, curi, curb->ni * sizeof(Ins));
+			if (curb->jmp.type == JXXX) {
+				curb->jmp.type = JJmp;
+				curb->s1 = b;
+			}
+		}
+		curb = b;
+		if (curb->np || curb->ni)
+			err("block already defined");
+		expect(TNL);
+		return PPhi;
 	case TRet:
-		curblk->jmp.ty = JRet;
-		goto Jump;
+		curb->jmp.type = JRet;
+		expect(TNL);
+		return PLbl;
 	case TJmp:
-		curblk->jmp.ty = JJmp;
+		curb->jmp.type = JJmp;
 		goto Jump;
 	case TCnd:
-		curblk->jmp.ty = JCnd;
+		curb->jmp.type = JCnd;
 		r = parseref();
-		if (r == R) {
-			errstr = "invalid argument for cnd";
-			return PErr;
-		}
-		if (token() != TComma) {
-			errstr = "missing comma";
-			return PErr;
-		}
-		curblk->jmp.arg = r;
+		if (r == R)
+			err("invalid argument for cnd jump");
+		curb->jmp.arg = r;
+		expect(TComma);
 	Jump:
-		if (
-		return PEnd;
-	}
-	r = varref(tokval.var);
-	if (token() != TEq) {
-		errstr = "= expected after variable";
-		return PErr;
+		expect(TLbl);
+		curb->s1 = findblk(tokval.str);
+		if (curb->jmp.type == TJmp) {
+			expect(TNL);
+			return PLbl;
+		}
+		expect(TComma);
+		expect(TLbl);
+		curb->s2 = findblk(tokval.str);
+		expect(TNL);
+		return PLbl;
 	}
-	switch (token()) {
+	r = varref(tokval.str);
+	expect(TEq);
+	switch (next()) {
 	case TAdd:
-		curins->op = OAdd;
+		op = OAdd;
+		j = 2;
 		break;
 	case TSub:
-		curins->op = OSub;
+		op = OSub;
+		j = 2;
 		break;
 	case TDiv:
-		curins->op = ODiv;
+		op = ODiv;
+		j = 2;
 		break;
 	case TMod:
-		curins->op = OMod;
+		op = OMod;
+		j = 2;
 		break;
 	case TPhi:
-		return PIns;
+		if (ps != PPhi)
+			err("unexpected phi instruction");
+		op = -1;
+		j = -1;
+		break;
 	default:
-		errstr = "invalid instruction";
-		return PErr;
+		err("invalid instruction");
 	}
-	na = 0;
-	for (;;) {
-		x
+	i = 0;
+	if (peek() != TNL)
+		for (;;) {
+			if (i == NPreds)
+				err("too many arguments");
+			args[i] = parseref();
+			if (args[i] == R)
+				err("invalid instruction argument");
+			i++;
+			t = peek();
+			if (t == TNL)
+				break;
+			if (t != TComma)
+				err("comma or end of line expected");
+			next();
+		}
+	next();
+	if (j >= 0 && i != j)
+		err("invalid arity");
+	if (op != -1) {
+		if (curi - inss >= NInss)
+			err("too many instructions in block");
+		curi->to = r;
+		curi->l = args[0];
+		curi->r = args[1];
+		curi++;
+		return PPhi;
+	} else {
+		if (curp - phis >= NPhis)
+			err("too many phis in block");
+		curp->to = r;
+		memcpy(curp->args, args, i * sizeof(Ref));
+		curp->na = i;
+		curp++;
+		return PIns;
 	}
 }
 
+Fn *
+parsefn(FILE *f)
+{
+	int i;
+	PState ps;
+	Fn *fn;
 
+	inf = f;
+	for (i=0; i<NBlks; i++) {
+		blks[i].name[0] = 0;
+		blks[i].blk = 0;
+	}
+	for (i=Temp0; i<NTemps; i++) {
+		sym[i].type = SUndef;
+		sym[i].name[0] = 0;
+		sym[i].blk = 0;
+	}
+	ntemp = Temp0;
+	curp = phis;
+	curi = inss;
+	curb = 0;
+	lnum = 1;
+	fn = alloc(sizeof *fn);
+	ps = parseline(PLbl);
+	fn->start = curb;  /* we should have parsed the start label */
+	do
+		ps = parseline(ps);
+	while (ps != PEnd);
+	fn->sym = alloc(ntemp * sizeof sym[0]);
+	memcpy(fn->sym, sym, ntemp * sizeof sym[0]);
+	fn->ntemp = ntemp;
+	return fn;
+}
 
 
 
-
-// in main parsing routine, reset lnum to 1
-// also reset curxxx
-// also nsym to Temp0
-// also sym[0..nsym-1].ty = SReg
-
-#if 0
-int main()
+int
+main()
 {
-	char *toknames[] = {
-		"TAdd",
-		"TSub",
-		"TDiv",
-		"TMod",
-		"TPhi",
-		"TJmp",
-		"TCnd",
-		"TRet",
-		"TNum",
-		"TVar",
-		"TEq",
-		"TComma",
-		"TLParen",
-		"TRParen",
-		"TNL",
-		"TEOF",
-	};
-	inf = stdin;
-	for (;;)
-		printf("%s\n", toknames[token()]);
+	parsefn(stdin);
+	return 0;
 }
-#endif