summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2015-07-03 12:16:39 -0400
committerQuentin Carbonneaux <quentin.carbonneaux@yale.edu>2015-09-15 23:01:27 -0400
commitfc814c5873d6ffd76130da015520b07c8f8f3450 (patch)
tree400038655972f9ba8d9cd757608bda60c5b99b76
parenta9d8bf7a2dd257a5027f238fae6095e3d0f02429 (diff)
downloadroux-fc814c5873d6ffd76130da015520b07c8f8f3450.tar.gz
attempt to complete the crappy parser
-rw-r--r--lisc/lisc.h18
-rw-r--r--lisc/parse.c386
2 files changed, 285 insertions, 119 deletions
diff --git a/lisc/lisc.h b/lisc/lisc.h
index ea8106e..abb025f 100644
--- a/lisc/lisc.h
+++ b/lisc/lisc.h
@@ -1,7 +1,9 @@
#include <assert.h>
+#include <limits.h>
#include <stdio.h>
+#include <stdlib.h>
-typedef unsgined int uint;
+typedef unsigned int uint;
typedef unsigned short ushort;
typedef unsigned char uchar;
@@ -13,9 +15,9 @@ typedef unsigned char uchar;
*/
enum {
- R = 0, /* invalid reference */
+ R = 0, /* invalid reference */
NRegs = 32,
- Temp0 = NRegs+1, /* first temporary */
+ Temp0 = NRegs+1,
NString = 32,
NPreds = 15,
NBlks = 128,
@@ -26,20 +28,21 @@ enum {
typedef struct Ins Ins;
typedef struct Phi Phi;
typedef struct Blk Blk;
+typedef struct Sym Sym;
typedef struct Fn Fn;
typedef ushort Ref;
enum {
RTemp = 0,
- RData = 1,
+ RConst = 1,
RMask = 1,
RShift = 1,
NRefs = USHRT_MAX>>RShift,
};
-#define TEMP(x) (((x)<<RShift) | RTemp)
-#define DATA(x) (((x)<<RShift) | RData)
+#define TEMP(x) (((x)<<RShift) | RTemp)
+#define CONST(x) (((x)<<RShift) | RConst)
enum {
OXXX,
@@ -47,7 +50,7 @@ enum {
OSub,
ODiv,
OMod,
- OLoad,
+ OConst,
/* reserved instructions */
OX86Div,
@@ -85,6 +88,7 @@ struct Blk {
Blk *s1;
Blk *s2;
+ char name[NString];
int rpo;
};
diff --git a/lisc/parse.c b/lisc/parse.c
index 86720b9..4e6ec85 100644
--- a/lisc/parse.c
+++ b/lisc/parse.c
@@ -1,24 +1,27 @@
-/* A really crude parser. */
+/* really crude parser
+ */
#include <ctype.h>
#include <string.h>
#include "lisc.h"
enum {
- MaxRefs = 256,
+ NTemps = 256,
};
typedef enum Token Token;
typedef enum PState PState;
-enum PState { /* Parsing state in a block. */
- PErr,
+enum PState {
+ PXXX,
+ PLbl,
PPhi,
PIns,
PEnd,
};
enum Token {
+ TXXX,
TAdd,
TSub,
TDiv,
@@ -30,6 +33,7 @@ enum Token {
TNum,
TVar,
+ TLbl,
TEq,
TComma,
TLParen,
@@ -41,23 +45,40 @@ enum Token {
static FILE *inf;
static char *errstr;
+static Token thead;
-static Sym sym[MaxRef];
-static Ref nref;
-static Blk blk[MaxBlks], *curblk;
-static Phi phi[MaxPhis], *curphi;
-static Ins ins[MaxInss], *curins;
+static Sym sym[NTemps];
+static Ref ntemp;
+static Phi phis[NPhis], *curp;
+static Ins inss[NInss], *curi;
+static struct {
+ char name[NString];
+ Blk *blk;
+} blks[NBlks+1];
+static Blk *curb;
static struct {
long long num;
- char *var;
+ char *str;
} tokval;
static int lnum;
+void *
+alloc(size_t n)
+{
+ void *p;
+
+ p = malloc(n);
+ if (!p)
+ abort();
+ return p;
+}
+
static void
err(char *s)
{
+ /* todo, export the error handling in util.c */
if (!s)
s = errstr;
assert(s);
@@ -66,7 +87,7 @@ err(char *s)
}
static Token
-token()
+lex()
{
static struct {
char *str;
@@ -82,8 +103,9 @@ token()
{ "ret", TRet },
{ 0 },
};
- static char tok[MaxIdnt];
- int c, i, var, sgn;
+ static char tok[NString];
+ int c, i, sgn;
+ Token t;
do
c = fgetc(inf);
@@ -118,29 +140,32 @@ token()
tokval.num *= 10;
tokval.num += c - '0';
}
- ungetc(c, f);
+ ungetc(c, inf);
tokval.num *= sgn;
return TNum;
}
if (c == '%') {
- var = 1;
+ t = TVar;
+ c = fgetc(inf);
+ } else if (c == '@') {
+ t = TLbl;
c = fgetc(inf);
} else
- var = 0;
+ t = TXXX;
if (!isalpha(c))
err("lexing failure");
i = 0;
do {
- if (i >= MaxIdnt-1)
+ if (i >= NString-1)
err("identifier too long");
tok[i++] = c;
c = fgetc(inf);
} while (isalpha(c) || isdigit(c));
tok[i] = 0;
- ungetc(c, f);
- if (var) {
- tokval.var = tok;
- return TVar;
+ ungetc(c, inf);
+ if (t != TXXX) {
+ tokval.str = tok;
+ return t;
}
for (i=0; tmap[i].str; i++)
if (strcmp(tok, tmap[i].str) == 0)
@@ -149,148 +174,285 @@ token()
return -1;
}
+static Token
+peek()
+{
+ if (thead == TXXX)
+ thead = lex();
+ return thead;
+}
+
+static Token
+next()
+{
+ Token t;
+
+ t = peek();
+ thead = TXXX;
+ return t;
+}
+
+static Blk *
+blocka()
+{
+ static Blk zblock;
+ Blk *b;
+
+ b = alloc(sizeof *b);
+ *b = zblock;
+ return b;
+}
+
static Ref
varref(char *v)
{
- Ref r;
+ int t;
- for (r=Temp0; r<nref; r++)
- if (sym[r].ty == STemp)
- if (strcmp(v, sym[r].stemp.id) == 0)
- return r;
- if (r >= MaxRefs)
- err("too many references");
- sym[r].ty = STemp;
- strcpy(sym[r].stemp.id, v);
- sym[r].stemp.blk = -1;
- return nref++;
+ for (t=Temp0; t<ntemp; t++)
+ if (sym[t].type == STemp)
+ if (strcmp(v, sym[t].name) == 0)
+ return TEMP(t);
+ if (t >= NTemps)
+ err("too many temporaries");
+ sym[t].type = STemp;
+ strcpy(sym[t].name, v);
+ sym[t].blk = 0;
+ return TEMP(t);
}
static Ref
parseref()
{
- Ref r;
-
- switch (token()) {
+ switch (next()) {
case TVar:
- return varref(tokval.var);
+ return varref(tokval.str);
case TNum:
- /* Add the constant to the symbol table. */
- for (r=Temp0; r<nref; r++)
- if (sym[r].ty == SNum)
- if (sym[r].snum == tokval.num)
- return r;
- if (r >= MaxRefs)
- err("too many references");
- sym[r].ty = SNum;
- sym[r].snum = tokval.num;
- retunr nref++;
+ if (tokval.num > NRefs || tokval.num < 0)
+ /* todo, use constants table */
+ abort();
+ return CONST(tokval.num);
default:
errstr = "number or variable expected";
return R;
}
}
+static Blk *
+findblk(char *name)
+{
+ int i;
+
+ assert(name[0]);
+ for (i=0; i<NBlks; i++)
+ if (!blks[i].blk || strcmp(blks[i].name, name) == 0)
+ break;
+ if (i == NBlks)
+ err("too many blocks");
+ if (!blks[i].blk) {
+ assert(blks[i].name[0] == 0);
+ strcpy(blks[i].name, name);
+ blks[i].blk = blocka();
+ strcpy(blks[i].blk->name, name);
+ }
+ return blks[i].blk;
+}
+
+static void
+expect(Token t)
+{
+ static char *names[] = {
+ [TLbl] = "label",
+ [TComma] = "comma",
+ [TEq] = "=",
+ [TNL] = "newline",
+ [TEOF] = 0,
+ };
+ char buf[128], *s1, *s2;
+ Token t1;
+
+ t1 = next();
+ if (t == t1)
+ return;
+ s1 = names[t] ? names[t] : "??";
+ s2 = names[t1] ? names[t1] : "??";
+ snprintf(buf, sizeof buf,
+ "%s expected (got %s instead)", s1, s2);
+ err(buf);
+}
+
static PState
parseline(PState ps)
{
- Ref args[MaxPreds];
+ Ref args[NPreds] = {0};
Ref r;
Token t;
- int na;
+ Blk *b;
+ int op, i, j;
- t = token();
+ assert(args[0] == R);
+ do
+ t = next();
+ while (t == TNL);
+ if (ps == PLbl && t != TLbl && t != TEOF)
+ err("label or end of file expected");
switch (t) {
default:
- errstr = "variable or jump expected";
- return PErr;
+ err("label, instruction or jump expected");
+ case TEOF:
+ return PEnd;
case TVar:
break;
+ case TLbl:
+ b = findblk(tokval.str);
+ if (curb) {
+ curb->np = curp - phis;
+ curb->ni = curi - inss;
+ curb->ps = alloc(curb->np * sizeof(Phi));
+ curb->is = alloc(curb->ni * sizeof(Ins));
+ memcpy(curb->ps, curp, curb->np * sizeof(Phi));
+ memcpy(curb->is, curi, curb->ni * sizeof(Ins));
+ if (curb->jmp.type == JXXX) {
+ curb->jmp.type = JJmp;
+ curb->s1 = b;
+ }
+ }
+ curb = b;
+ if (curb->np || curb->ni)
+ err("block already defined");
+ expect(TNL);
+ return PPhi;
case TRet:
- curblk->jmp.ty = JRet;
- goto Jump;
+ curb->jmp.type = JRet;
+ expect(TNL);
+ return PLbl;
case TJmp:
- curblk->jmp.ty = JJmp;
+ curb->jmp.type = JJmp;
goto Jump;
case TCnd:
- curblk->jmp.ty = JCnd;
+ curb->jmp.type = JCnd;
r = parseref();
- if (r == R) {
- errstr = "invalid argument for cnd";
- return PErr;
- }
- if (token() != TComma) {
- errstr = "missing comma";
- return PErr;
- }
- curblk->jmp.arg = r;
+ if (r == R)
+ err("invalid argument for cnd jump");
+ curb->jmp.arg = r;
+ expect(TComma);
Jump:
- if (
- return PEnd;
- }
- r = varref(tokval.var);
- if (token() != TEq) {
- errstr = "= expected after variable";
- return PErr;
+ expect(TLbl);
+ curb->s1 = findblk(tokval.str);
+ if (curb->jmp.type == TJmp) {
+ expect(TNL);
+ return PLbl;
+ }
+ expect(TComma);
+ expect(TLbl);
+ curb->s2 = findblk(tokval.str);
+ expect(TNL);
+ return PLbl;
}
- switch (token()) {
+ r = varref(tokval.str);
+ expect(TEq);
+ switch (next()) {
case TAdd:
- curins->op = OAdd;
+ op = OAdd;
+ j = 2;
break;
case TSub:
- curins->op = OSub;
+ op = OSub;
+ j = 2;
break;
case TDiv:
- curins->op = ODiv;
+ op = ODiv;
+ j = 2;
break;
case TMod:
- curins->op = OMod;
+ op = OMod;
+ j = 2;
break;
case TPhi:
- return PIns;
+ if (ps != PPhi)
+ err("unexpected phi instruction");
+ op = -1;
+ j = -1;
+ break;
default:
- errstr = "invalid instruction";
- return PErr;
+ err("invalid instruction");
}
- na = 0;
- for (;;) {
- x
+ i = 0;
+ if (peek() != TNL)
+ for (;;) {
+ if (i == NPreds)
+ err("too many arguments");
+ args[i] = parseref();
+ if (args[i] == R)
+ err("invalid instruction argument");
+ i++;
+ t = peek();
+ if (t == TNL)
+ break;
+ if (t != TComma)
+ err("comma or end of line expected");
+ next();
+ }
+ next();
+ if (j >= 0 && i != j)
+ err("invalid arity");
+ if (op != -1) {
+ if (curi - inss >= NInss)
+ err("too many instructions in block");
+ curi->to = r;
+ curi->l = args[0];
+ curi->r = args[1];
+ curi++;
+ return PPhi;
+ } else {
+ if (curp - phis >= NPhis)
+ err("too many phis in block");
+ curp->to = r;
+ memcpy(curp->args, args, i * sizeof(Ref));
+ curp->na = i;
+ curp++;
+ return PIns;
}
}
+Fn *
+parsefn(FILE *f)
+{
+ int i;
+ PState ps;
+ Fn *fn;
+ inf = f;
+ for (i=0; i<NBlks; i++) {
+ blks[i].name[0] = 0;
+ blks[i].blk = 0;
+ }
+ for (i=Temp0; i<NTemps; i++) {
+ sym[i].type = SUndef;
+ sym[i].name[0] = 0;
+ sym[i].blk = 0;
+ }
+ ntemp = Temp0;
+ curp = phis;
+ curi = inss;
+ curb = 0;
+ lnum = 1;
+ fn = alloc(sizeof *fn);
+ ps = parseline(PLbl);
+ fn->start = curb; /* we should have parsed the start label */
+ do
+ ps = parseline(ps);
+ while (ps != PEnd);
+ fn->sym = alloc(ntemp * sizeof sym[0]);
+ memcpy(fn->sym, sym, ntemp * sizeof sym[0]);
+ fn->ntemp = ntemp;
+ return fn;
+}
-
-// in main parsing routine, reset lnum to 1
-// also reset curxxx
-// also nsym to Temp0
-// also sym[0..nsym-1].ty = SReg
-
-#if 0
-int main()
+int
+main()
{
- char *toknames[] = {
- "TAdd",
- "TSub",
- "TDiv",
- "TMod",
- "TPhi",
- "TJmp",
- "TCnd",
- "TRet",
- "TNum",
- "TVar",
- "TEq",
- "TComma",
- "TLParen",
- "TRParen",
- "TNL",
- "TEOF",
- };
- inf = stdin;
- for (;;)
- printf("%s\n", toknames[token()]);
+ parsefn(stdin);
+ return 0;
}
-#endif