diff options
author | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-07-03 12:16:39 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-09-15 23:01:27 -0400 |
commit | fc814c5873d6ffd76130da015520b07c8f8f3450 (patch) | |
tree | 400038655972f9ba8d9cd757608bda60c5b99b76 /lisc | |
parent | a9d8bf7a2dd257a5027f238fae6095e3d0f02429 (diff) | |
download | roux-fc814c5873d6ffd76130da015520b07c8f8f3450.tar.gz |
attempt to complete the crappy parser
Diffstat (limited to 'lisc')
-rw-r--r-- | lisc/lisc.h | 18 | ||||
-rw-r--r-- | lisc/parse.c | 386 |
2 files changed, 285 insertions, 119 deletions
diff --git a/lisc/lisc.h b/lisc/lisc.h index ea8106e..abb025f 100644 --- a/lisc/lisc.h +++ b/lisc/lisc.h @@ -1,7 +1,9 @@ #include <assert.h> +#include <limits.h> #include <stdio.h> +#include <stdlib.h> -typedef unsgined int uint; +typedef unsigned int uint; typedef unsigned short ushort; typedef unsigned char uchar; @@ -13,9 +15,9 @@ typedef unsigned char uchar; */ enum { - R = 0, /* invalid reference */ + R = 0, /* invalid reference */ NRegs = 32, - Temp0 = NRegs+1, /* first temporary */ + Temp0 = NRegs+1, NString = 32, NPreds = 15, NBlks = 128, @@ -26,20 +28,21 @@ enum { typedef struct Ins Ins; typedef struct Phi Phi; typedef struct Blk Blk; +typedef struct Sym Sym; typedef struct Fn Fn; typedef ushort Ref; enum { RTemp = 0, - RData = 1, + RConst = 1, RMask = 1, RShift = 1, NRefs = USHRT_MAX>>RShift, }; -#define TEMP(x) (((x)<<RShift) | RTemp) -#define DATA(x) (((x)<<RShift) | RData) +#define TEMP(x) (((x)<<RShift) | RTemp) +#define CONST(x) (((x)<<RShift) | RConst) enum { OXXX, @@ -47,7 +50,7 @@ enum { OSub, ODiv, OMod, - OLoad, + OConst, /* reserved instructions */ OX86Div, @@ -85,6 +88,7 @@ struct Blk { Blk *s1; Blk *s2; + char name[NString]; int rpo; }; diff --git a/lisc/parse.c b/lisc/parse.c index 86720b9..4e6ec85 100644 --- a/lisc/parse.c +++ b/lisc/parse.c @@ -1,24 +1,27 @@ -/* A really crude parser. */ +/* really crude parser + */ #include <ctype.h> #include <string.h> #include "lisc.h" enum { - MaxRefs = 256, + NTemps = 256, }; typedef enum Token Token; typedef enum PState PState; -enum PState { /* Parsing state in a block. */ - PErr, +enum PState { + PXXX, + PLbl, PPhi, PIns, PEnd, }; enum Token { + TXXX, TAdd, TSub, TDiv, @@ -30,6 +33,7 @@ enum Token { TNum, TVar, + TLbl, TEq, TComma, TLParen, @@ -41,23 +45,40 @@ enum Token { static FILE *inf; static char *errstr; +static Token thead; -static Sym sym[MaxRef]; -static Ref nref; -static Blk blk[MaxBlks], *curblk; -static Phi phi[MaxPhis], *curphi; -static Ins ins[MaxInss], *curins; +static Sym sym[NTemps]; +static Ref ntemp; +static Phi phis[NPhis], *curp; +static Ins inss[NInss], *curi; +static struct { + char name[NString]; + Blk *blk; +} blks[NBlks+1]; +static Blk *curb; static struct { long long num; - char *var; + char *str; } tokval; static int lnum; +void * +alloc(size_t n) +{ + void *p; + + p = malloc(n); + if (!p) + abort(); + return p; +} + static void err(char *s) { + /* todo, export the error handling in util.c */ if (!s) s = errstr; assert(s); @@ -66,7 +87,7 @@ err(char *s) } static Token -token() +lex() { static struct { char *str; @@ -82,8 +103,9 @@ token() { "ret", TRet }, { 0 }, }; - static char tok[MaxIdnt]; - int c, i, var, sgn; + static char tok[NString]; + int c, i, sgn; + Token t; do c = fgetc(inf); @@ -118,29 +140,32 @@ token() tokval.num *= 10; tokval.num += c - '0'; } - ungetc(c, f); + ungetc(c, inf); tokval.num *= sgn; return TNum; } if (c == '%') { - var = 1; + t = TVar; + c = fgetc(inf); + } else if (c == '@') { + t = TLbl; c = fgetc(inf); } else - var = 0; + t = TXXX; if (!isalpha(c)) err("lexing failure"); i = 0; do { - if (i >= MaxIdnt-1) + if (i >= NString-1) err("identifier too long"); tok[i++] = c; c = fgetc(inf); } while (isalpha(c) || isdigit(c)); tok[i] = 0; - ungetc(c, f); - if (var) { - tokval.var = tok; - return TVar; + ungetc(c, inf); + if (t != TXXX) { + tokval.str = tok; + return t; } for (i=0; tmap[i].str; i++) if (strcmp(tok, tmap[i].str) == 0) @@ -149,148 +174,285 @@ token() return -1; } +static Token +peek() +{ + if (thead == TXXX) + thead = lex(); + return thead; +} + +static Token +next() +{ + Token t; + + t = peek(); + thead = TXXX; + return t; +} + +static Blk * +blocka() +{ + static Blk zblock; + Blk *b; + + b = alloc(sizeof *b); + *b = zblock; + return b; +} + static Ref varref(char *v) { - Ref r; + int t; - for (r=Temp0; r<nref; r++) - if (sym[r].ty == STemp) - if (strcmp(v, sym[r].stemp.id) == 0) - return r; - if (r >= MaxRefs) - err("too many references"); - sym[r].ty = STemp; - strcpy(sym[r].stemp.id, v); - sym[r].stemp.blk = -1; - return nref++; + for (t=Temp0; t<ntemp; t++) + if (sym[t].type == STemp) + if (strcmp(v, sym[t].name) == 0) + return TEMP(t); + if (t >= NTemps) + err("too many temporaries"); + sym[t].type = STemp; + strcpy(sym[t].name, v); + sym[t].blk = 0; + return TEMP(t); } static Ref parseref() { - Ref r; - - switch (token()) { + switch (next()) { case TVar: - return varref(tokval.var); + return varref(tokval.str); case TNum: - /* Add the constant to the symbol table. */ - for (r=Temp0; r<nref; r++) - if (sym[r].ty == SNum) - if (sym[r].snum == tokval.num) - return r; - if (r >= MaxRefs) - err("too many references"); - sym[r].ty = SNum; - sym[r].snum = tokval.num; - retunr nref++; + if (tokval.num > NRefs || tokval.num < 0) + /* todo, use constants table */ + abort(); + return CONST(tokval.num); default: errstr = "number or variable expected"; return R; } } +static Blk * +findblk(char *name) +{ + int i; + + assert(name[0]); + for (i=0; i<NBlks; i++) + if (!blks[i].blk || strcmp(blks[i].name, name) == 0) + break; + if (i == NBlks) + err("too many blocks"); + if (!blks[i].blk) { + assert(blks[i].name[0] == 0); + strcpy(blks[i].name, name); + blks[i].blk = blocka(); + strcpy(blks[i].blk->name, name); + } + return blks[i].blk; +} + +static void +expect(Token t) +{ + static char *names[] = { + [TLbl] = "label", + [TComma] = "comma", + [TEq] = "=", + [TNL] = "newline", + [TEOF] = 0, + }; + char buf[128], *s1, *s2; + Token t1; + + t1 = next(); + if (t == t1) + return; + s1 = names[t] ? names[t] : "??"; + s2 = names[t1] ? names[t1] : "??"; + snprintf(buf, sizeof buf, + "%s expected (got %s instead)", s1, s2); + err(buf); +} + static PState parseline(PState ps) { - Ref args[MaxPreds]; + Ref args[NPreds] = {0}; Ref r; Token t; - int na; + Blk *b; + int op, i, j; - t = token(); + assert(args[0] == R); + do + t = next(); + while (t == TNL); + if (ps == PLbl && t != TLbl && t != TEOF) + err("label or end of file expected"); switch (t) { default: - errstr = "variable or jump expected"; - return PErr; + err("label, instruction or jump expected"); + case TEOF: + return PEnd; case TVar: break; + case TLbl: + b = findblk(tokval.str); + if (curb) { + curb->np = curp - phis; + curb->ni = curi - inss; + curb->ps = alloc(curb->np * sizeof(Phi)); + curb->is = alloc(curb->ni * sizeof(Ins)); + memcpy(curb->ps, curp, curb->np * sizeof(Phi)); + memcpy(curb->is, curi, curb->ni * sizeof(Ins)); + if (curb->jmp.type == JXXX) { + curb->jmp.type = JJmp; + curb->s1 = b; + } + } + curb = b; + if (curb->np || curb->ni) + err("block already defined"); + expect(TNL); + return PPhi; case TRet: - curblk->jmp.ty = JRet; - goto Jump; + curb->jmp.type = JRet; + expect(TNL); + return PLbl; case TJmp: - curblk->jmp.ty = JJmp; + curb->jmp.type = JJmp; goto Jump; case TCnd: - curblk->jmp.ty = JCnd; + curb->jmp.type = JCnd; r = parseref(); - if (r == R) { - errstr = "invalid argument for cnd"; - return PErr; - } - if (token() != TComma) { - errstr = "missing comma"; - return PErr; - } - curblk->jmp.arg = r; + if (r == R) + err("invalid argument for cnd jump"); + curb->jmp.arg = r; + expect(TComma); Jump: - if ( - return PEnd; - } - r = varref(tokval.var); - if (token() != TEq) { - errstr = "= expected after variable"; - return PErr; + expect(TLbl); + curb->s1 = findblk(tokval.str); + if (curb->jmp.type == TJmp) { + expect(TNL); + return PLbl; + } + expect(TComma); + expect(TLbl); + curb->s2 = findblk(tokval.str); + expect(TNL); + return PLbl; } - switch (token()) { + r = varref(tokval.str); + expect(TEq); + switch (next()) { case TAdd: - curins->op = OAdd; + op = OAdd; + j = 2; break; case TSub: - curins->op = OSub; + op = OSub; + j = 2; break; case TDiv: - curins->op = ODiv; + op = ODiv; + j = 2; break; case TMod: - curins->op = OMod; + op = OMod; + j = 2; break; case TPhi: - return PIns; + if (ps != PPhi) + err("unexpected phi instruction"); + op = -1; + j = -1; + break; default: - errstr = "invalid instruction"; - return PErr; + err("invalid instruction"); } - na = 0; - for (;;) { - x + i = 0; + if (peek() != TNL) + for (;;) { + if (i == NPreds) + err("too many arguments"); + args[i] = parseref(); + if (args[i] == R) + err("invalid instruction argument"); + i++; + t = peek(); + if (t == TNL) + break; + if (t != TComma) + err("comma or end of line expected"); + next(); + } + next(); + if (j >= 0 && i != j) + err("invalid arity"); + if (op != -1) { + if (curi - inss >= NInss) + err("too many instructions in block"); + curi->to = r; + curi->l = args[0]; + curi->r = args[1]; + curi++; + return PPhi; + } else { + if (curp - phis >= NPhis) + err("too many phis in block"); + curp->to = r; + memcpy(curp->args, args, i * sizeof(Ref)); + curp->na = i; + curp++; + return PIns; } } +Fn * +parsefn(FILE *f) +{ + int i; + PState ps; + Fn *fn; + inf = f; + for (i=0; i<NBlks; i++) { + blks[i].name[0] = 0; + blks[i].blk = 0; + } + for (i=Temp0; i<NTemps; i++) { + sym[i].type = SUndef; + sym[i].name[0] = 0; + sym[i].blk = 0; + } + ntemp = Temp0; + curp = phis; + curi = inss; + curb = 0; + lnum = 1; + fn = alloc(sizeof *fn); + ps = parseline(PLbl); + fn->start = curb; /* we should have parsed the start label */ + do + ps = parseline(ps); + while (ps != PEnd); + fn->sym = alloc(ntemp * sizeof sym[0]); + memcpy(fn->sym, sym, ntemp * sizeof sym[0]); + fn->ntemp = ntemp; + return fn; +} - -// in main parsing routine, reset lnum to 1 -// also reset curxxx -// also nsym to Temp0 -// also sym[0..nsym-1].ty = SReg - -#if 0 -int main() +int +main() { - char *toknames[] = { - "TAdd", - "TSub", - "TDiv", - "TMod", - "TPhi", - "TJmp", - "TCnd", - "TRet", - "TNum", - "TVar", - "TEq", - "TComma", - "TLParen", - "TRParen", - "TNL", - "TEOF", - }; - inf = stdin; - for (;;) - printf("%s\n", toknames[token()]); + parsefn(stdin); + return 0; } -#endif |