From 762330d6fa626e57a0b350cd89680d59b3158ba6 Mon Sep 17 00:00:00 2001 From: Quentin Carbonneaux Date: Mon, 29 Jun 2015 06:56:11 -0400 Subject: try writing a parser, painful --- lisc/lisc.h | 74 ++++++++++++--- lisc/lo.c | 37 ++++++-- lisc/parse.c | 296 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 383 insertions(+), 24 deletions(-) create mode 100644 lisc/parse.c diff --git a/lisc/lisc.h b/lisc/lisc.h index dbe4d63..d73cc63 100644 --- a/lisc/lisc.h +++ b/lisc/lisc.h @@ -1,33 +1,42 @@ +#include #include #include +/* + References have to be able to encode: + - Results of other operations (temporaries). + - Machine registers (for lowering). + - Spill locations. + - Constants/pointers to program data. +*/ + enum { - R = -1, /* Invalid reference. */ - Temp0 = 32, /* First temporary, below are machine registers. */ - MaxPreds = 16, /* Maximum number of predecessors for a block. */ + R = 0, /* Invalid reference. */ + Temp0 = 33, /* First temporary ref. */ + Const0 = 20000, /* First constant ref. */ + MaxIdnt = 32, + MaxPreds = 16, MaxBlks = 128, MaxInss = 128, MaxPhis = 128, }; -typedef int Ref; +typedef unsigned Ref; typedef struct Ins Ins; typedef struct Phi Phi; typedef struct Blk Blk; +typedef struct Fn Fn; typedef enum Op Op; typedef enum Jmp Jmp; enum Op { - ONop, - OAdd, + OAdd = 1, OSub, - OSDiv, + ODiv, OMod, - OParam, - OCall, - /* x86 instructions (reserved) */ - XDiv, + /* Reserved instructions. */ + X86Div, }; enum Jmp { @@ -49,14 +58,51 @@ struct Phi { }; struct Blk { - int np; - int ni; Phi *ps; Ins *is; + int np; + int ni; struct { Jmp ty; Ref arg; } jmp; int suc0, suc1; - int dpth; + int lvl; +}; + +struct Sym { + enum { + SUndef, + SReg, + SNum, + STemp, + } ty; + union { + char sreg[MaxIdnt]; + long long snum; + struct { + char id[MaxIdnt]; + int blk; + enum { + TPhi, + TIns, + } ty; + int loc; + } stemp; + } u; +}; + +#define sreg u.sreg +#define snum u.snum +#define stemp u.stemp + +struct Fn { + Sym *sym; + Blk *blk; + int nblk; + Ref nref; }; + + +/* parse.c */ +Fn *parsefn(FILE *); diff --git a/lisc/lo.c b/lisc/lo.c index 931385e..19b3a7d 100644 --- a/lisc/lo.c +++ b/lisc/lo.c @@ -1,8 +1,5 @@ -/*% clang -Wall -o # % - */ #include "lisc.h" - static void rporec(int *rpo, int *cnt, Blk *bs, int b) { @@ -16,7 +13,7 @@ rporec(int *rpo, int *cnt, Blk *bs, int b) } int -dorpo(Blk *bs, int nb) +rposched(Blk *bs, int nb) { static int rpo[MaxBlks]; Blk t; @@ -55,6 +52,23 @@ dorpo(Blk *bs, int nb) } +void +cfdump(FILE *f, char *pname, Blk *bs, int nb) +{ + Blk *b; + int i; + + fprintf(f, "digraph %s {\n", pname); + for (b=bs, i=0; isuc0 >= 0) + fprintf(f, "b%d -> b%d;\n", i, b->suc0); + if (b->suc1 >= 0) + fprintf(f, "b%d -> b%d;\n", i, b->suc1); + fprintf(f, "b%d [shape=box]\n", i); + } + fprintf(f, "}\n"); +} + #define LEN(a) sizeof a / sizeof a[0] Blk rpocond[] = { @@ -69,14 +83,17 @@ int main() { Blk *bs; - int i, nb; + int nb; + FILE *f; bs = rpocond; nb = LEN(rpocond); - nb = dorpo(bs, nb); - for (i=0; i [%02d, %02d]\n", i, - bs[i].suc0, bs[i].suc1); - } + + f = fopen("cf.dot", "w"); + cfdump(f, "bef", bs, nb); + nb = rposched(bs, nb); + cfdump(f, "aft", bs, nb); + fclose(f); + return 0; } diff --git a/lisc/parse.c b/lisc/parse.c new file mode 100644 index 0000000..86720b9 --- /dev/null +++ b/lisc/parse.c @@ -0,0 +1,296 @@ +/* A really crude parser. */ +#include +#include + +#include "lisc.h" + +enum { + MaxRefs = 256, +}; + +typedef enum Token Token; +typedef enum PState PState; + +enum PState { /* Parsing state in a block. */ + PErr, + PPhi, + PIns, + PEnd, +}; + +enum Token { + TAdd, + TSub, + TDiv, + TMod, + TPhi, + TJmp, + TCnd, + TRet, + + TNum, + TVar, + TEq, + TComma, + TLParen, + TRParen, + TNL, + TEOF, +}; + + +static FILE *inf; +static char *errstr; + +static Sym sym[MaxRef]; +static Ref nref; +static Blk blk[MaxBlks], *curblk; +static Phi phi[MaxPhis], *curphi; +static Ins ins[MaxInss], *curins; + +static struct { + long long num; + char *var; +} tokval; +static int lnum; + + +static void +err(char *s) +{ + if (!s) + s = errstr; + assert(s); + fprintf(stderr, "parse error: %s (line %d)\n", s, lnum); + exit(1); +} + +static Token +token() +{ + static struct { + char *str; + Token tok; + } tmap[] = { + { "add", TAdd }, + { "sub", TSub }, + { "div", TDiv }, + { "mod", TMod }, + { "phi", TPhi }, + { "jmp", TJmp }, + { "cnd", TCnd }, + { "ret", TRet }, + { 0 }, + }; + static char tok[MaxIdnt]; + int c, i, var, sgn; + + do + c = fgetc(inf); + while (isblank(c)); + switch (c) { + case EOF: + return TEOF; + case ',': + return TComma; + case '(': + return TLParen; + case ')': + return TRParen; + case '=': + return TEq; + case '\n': + lnum++; + return TNL; + } + if (isdigit(c) || c == '-') { + if (c == '-') { + tokval.num = 0; + sgn = -1; + } else { + tokval.num = c - '0'; + sgn = 1; + } + for (;;) { + c = fgetc(inf); + if (!isdigit(c)) + break; + tokval.num *= 10; + tokval.num += c - '0'; + } + ungetc(c, f); + tokval.num *= sgn; + return TNum; + } + if (c == '%') { + var = 1; + c = fgetc(inf); + } else + var = 0; + if (!isalpha(c)) + err("lexing failure"); + i = 0; + do { + if (i >= MaxIdnt-1) + err("identifier too long"); + tok[i++] = c; + c = fgetc(inf); + } while (isalpha(c) || isdigit(c)); + tok[i] = 0; + ungetc(c, f); + if (var) { + tokval.var = tok; + return TVar; + } + for (i=0; tmap[i].str; i++) + if (strcmp(tok, tmap[i].str) == 0) + return tmap[i].tok; + err("unknown keyword"); + return -1; +} + +static Ref +varref(char *v) +{ + Ref r; + + for (r=Temp0; r= MaxRefs) + err("too many references"); + sym[r].ty = STemp; + strcpy(sym[r].stemp.id, v); + sym[r].stemp.blk = -1; + return nref++; +} + +static Ref +parseref() +{ + Ref r; + + switch (token()) { + case TVar: + return varref(tokval.var); + case TNum: + /* Add the constant to the symbol table. */ + for (r=Temp0; r= MaxRefs) + err("too many references"); + sym[r].ty = SNum; + sym[r].snum = tokval.num; + retunr nref++; + default: + errstr = "number or variable expected"; + return R; + } +} + +static PState +parseline(PState ps) +{ + Ref args[MaxPreds]; + Ref r; + Token t; + int na; + + t = token(); + switch (t) { + default: + errstr = "variable or jump expected"; + return PErr; + case TVar: + break; + case TRet: + curblk->jmp.ty = JRet; + goto Jump; + case TJmp: + curblk->jmp.ty = JJmp; + goto Jump; + case TCnd: + curblk->jmp.ty = JCnd; + r = parseref(); + if (r == R) { + errstr = "invalid argument for cnd"; + return PErr; + } + if (token() != TComma) { + errstr = "missing comma"; + return PErr; + } + curblk->jmp.arg = r; + Jump: + if ( + return PEnd; + } + r = varref(tokval.var); + if (token() != TEq) { + errstr = "= expected after variable"; + return PErr; + } + switch (token()) { + case TAdd: + curins->op = OAdd; + break; + case TSub: + curins->op = OSub; + break; + case TDiv: + curins->op = ODiv; + break; + case TMod: + curins->op = OMod; + break; + case TPhi: + return PIns; + default: + errstr = "invalid instruction"; + return PErr; + } + na = 0; + for (;;) { + x + } +} + + + + + + +// in main parsing routine, reset lnum to 1 +// also reset curxxx +// also nsym to Temp0 +// also sym[0..nsym-1].ty = SReg + +#if 0 +int main() +{ + char *toknames[] = { + "TAdd", + "TSub", + "TDiv", + "TMod", + "TPhi", + "TJmp", + "TCnd", + "TRet", + "TNum", + "TVar", + "TEq", + "TComma", + "TLParen", + "TRParen", + "TNL", + "TEOF", + }; + inf = stdin; + for (;;) + printf("%s\n", toknames[token()]); +} +#endif -- cgit 1.4.1