diff options
author | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-10-02 15:34:38 -0400 |
---|---|---|
committer | Quentin Carbonneaux <quentin.carbonneaux@yale.edu> | 2015-10-02 15:34:38 -0400 |
commit | 30eced928c404caae41d62f238270f00bdf7c25b (patch) | |
tree | 6f305c2d318e4a9b2dbec9f5a38a704a090c96c1 /minic | |
parent | ecaad8119f77601fec4cab61c510d697ec5bb939 (diff) | |
download | roux-30eced928c404caae41d62f238270f00bdf7c25b.tar.gz |
start an example compiler for a subset of C
Diffstat (limited to 'minic')
-rw-r--r-- | minic/LICENSE | 21 | ||||
-rw-r--r-- | minic/Makefile | 8 | ||||
-rw-r--r-- | minic/minic.y | 629 | ||||
-rw-r--r-- | minic/test.minic | 9 | ||||
-rw-r--r-- | minic/yacc.c | 1365 |
5 files changed, 2032 insertions, 0 deletions
diff --git a/minic/LICENSE b/minic/LICENSE new file mode 100644 index 0000000..7866b82 --- /dev/null +++ b/minic/LICENSE @@ -0,0 +1,21 @@ +MIT/X Consortium License + +© 2015 Quentin Carbonneaux + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/minic/Makefile b/minic/Makefile new file mode 100644 index 0000000..39b2946 --- /dev/null +++ b/minic/Makefile @@ -0,0 +1,8 @@ +CFLAGS=-g -Wall + +minic: yacc minic.y + ./yacc minic.y + $(CC) $(CFLAGS) -o $@ y.tab.c +clean: + rm -f yacc minic y.* +.PHONY: clean diff --git a/minic/minic.y b/minic/minic.y new file mode 100644 index 0000000..00351e1 --- /dev/null +++ b/minic/minic.y @@ -0,0 +1,629 @@ +%{ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +enum { + NString = 16, + NVar = 256, +}; + + +enum { /* minic types */ + INT = 0, + LNG = 1, + PTR = 2, +}; + +#define IDIR(x) (((x) << 2) + PTR) +#define DREF(x) ((x) >> 2) +#define KIND(x) ((x) & 3) +#define SIZE(x) (KIND(x) == INT ? 4 : 8) + +typedef struct Node Node; +typedef struct Symb Symb; +typedef struct Stmt Stmt; + +struct Node { + char op; + union { + int n; + char v[NString]; + } u; + Node *l, *r; +}; + +struct Symb { + enum { + Con, + Tmp, + Var, + } t; + union { + int n; + char v[NString]; + } u; + unsigned long ctyp; +}; + +struct Stmt { + enum { + If, + While, + Seq, + Expr, + } t; + void *p1, *p2, *p3; +}; + +int yylex(void), yyerror(char *); +Symb expr(Node *), lval(Node *); + +FILE *of; +int lbl, tmp; +struct { + char v[NString]; + unsigned ctyp; +} varh[NVar]; + +void +die(char *s) +{ + fprintf(stderr, "error: %s\n", s); + exit(1); +} + +void * +alloc(size_t s) +{ + void *p; + + p = malloc(s); + if (!p) + die("out of memory"); + return p; +} + +unsigned +hash(char *s) +{ + unsigned h; + + h = 42; + while (*s) + h += 11 * h + *s++; + return h % NVar; +} + +void +varclr() +{ + unsigned h; + + for (h=0; h<NVar; h++) + varh[h].v[0] = 0; +} + +void +varadd(char *v, unsigned ctyp) +{ + unsigned h0, h; + + h0 = hash(v); + h = h0; + do { + if (varh[h].v[0] == 0) { + strcpy(varh[h].v, v); + varh[h].ctyp = ctyp; + return; + } + if (strcmp(varh[h].v, v) == 0) + die("double definition"); + } while(++h != h0); + die("too many variables"); +} + +unsigned +varctyp(char *v) +{ + unsigned h0, h; + + h0 = hash(v); + h = h0; + do { + if (strcmp(varh[h].v, v) == 0) + return varh[h].ctyp; + } while (++h != h0 && varh[h].v[0] != 0); + die("undeclared variable"); + return -1; +} + +char +irtyp(unsigned ctyp) +{ + switch (KIND(ctyp)) { + default: + die("internal error"); + case INT: + return 'w'; + case PTR: + case LNG: + return 'l'; + } +} + +void +psymb(Symb s) +{ + switch (s.t) { + case Tmp: + fprintf(of, "%%t%d", s.u.n); + break; + case Var: + fprintf(of, "%%%s", s.u.v); + break; + case Con: + fprintf(of, "%d", s.u.n); + break; + } +} + +void +sext(Symb *s) +{ + fprintf(of, "\t%%t%d =l sext ", tmp); + psymb(*s); + fprintf(of, "\n"); + s->t = Tmp; + s->ctyp = LNG; + s->u.n = tmp++; +} + +unsigned +prom(int op, Symb *l, Symb *r) +{ + Symb *t; + int sz; + + if (l->ctyp == r->ctyp && KIND(l->ctyp) != PTR) + return l->ctyp; + + if (l->ctyp == LNG && r->ctyp == INT) { + sext(r); + return LNG; + } + if (l->ctyp == INT && r->ctyp == LNG) { + sext(l); + return LNG; + } + + if (op == '+') { + if (KIND(r->ctyp) == PTR) { + t = l; + l = r; + r = t; + } + if (KIND(r->ctyp) == PTR) + die("pointers added"); + goto Scale; + } + + if (op == '-') { + if (KIND(l->ctyp) != PTR) + die("pointer substracted from integer"); + if (KIND(r->ctyp) != PTR) + goto Scale; + if (l->ctyp != r->ctyp) + die("non-homogeneous pointers in substraction"); + return LNG; + } + +Scale: + if (irtyp(r->ctyp) != 'l') + sext(r); + sz = SIZE(DREF(l->ctyp)); + fprintf(of, "\t%%t%d =l mul %d, ", sz, tmp); + psymb(*r); + fprintf(of, "\n"); + r->u.n = tmp++; + return l->ctyp; +} + +Symb +expr(Node *n) +{ + static char *otoa[] = { + ['+'] = "add", + ['-'] = "sub", + ['*'] = "mul", + ['/'] = "div", + ['%'] = "rem", + ['<'] = "cslt", /* meeeeh, careful with pointers */ + ['l'] = "csle", + ['e'] = "cseq", + ['n'] = "csne", + }; + Symb sr, s0, s1; + + sr.t = Tmp; + sr.u.n = tmp++; + + switch (n->op) { + + case 'V': + sr.ctyp = varctyp(n->u.v); + fprintf(of, "\t"); + psymb(sr); + fprintf(of, " =%c ", irtyp(sr.ctyp)); + fprintf(of, "load %%%s\n", n->u.v); + break; + + case 'N': + sr.t = Con; + sr.u.n = n->u.n; + sr.ctyp = INT; + break; + + case '@': + s0 = expr(n->l); + if (KIND(s0.ctyp) != PTR) + die("dereference of a non-pointer"); + sr.ctyp = DREF(s0.ctyp); + fprintf(of, "\t"); + psymb(sr); + fprintf(of, " =%c load ", irtyp(sr.ctyp)); + psymb(s0); + fprintf(of, "\n"); + break; + + case '&': + sr = lval(n->l); + sr.ctyp = IDIR(sr.ctyp); + break; + + case '=': + s0 = expr(n->r); + s1 = lval(n->l); + sr = s0; + if (s1.ctyp == LNG && s0.ctyp == INT) + sext(&s0); + if (s1.ctyp != s0.ctyp) + die("invalid assignment"); + fprintf(of, "\tstore%c ", irtyp(s1.ctyp)); + goto Args; + + case 'P': + case 'M': + die("unimplemented ++ and --"); + break; + + default: + s0 = expr(n->l); + s1 = expr(n->r); + sr.ctyp = prom(n->op, &s0, &s1); + if (strchr("ne<l", n->op)) + sr.ctyp = INT; + fprintf(of, "\t"); + psymb(sr); + fprintf(of, " =%c", irtyp(sr.ctyp)); + fprintf(of, " %s ", otoa[(int)n->op]); + Args: + psymb(s0); + fprintf(of, ", "); + psymb(s1); + fprintf(of, "\n"); + break; + + } + if (n->op == '-' + && KIND(s0.ctyp) == PTR + && KIND(s1.ctyp) == PTR) { + fprintf(of, "\t%%t%d =l div ", tmp); + psymb(sr); + fprintf(of, ", %d\n", SIZE(DREF(s0.ctyp))); + sr.u.n = tmp++; + } + return sr; +} + +Symb +lval(Node *n) +{ + Symb sr; + + switch (n->op) { + default: + die("invalid lvalue"); + case 'V': + sr.t = Var; + sr.ctyp = varctyp(n->u.v); + strcpy(sr.u.v, n->u.v); + break; + case '@': + sr = expr(n->l); + if (KIND(sr.ctyp) != PTR) + die("dereference of a non-pointer"); + sr.ctyp = DREF(sr.ctyp); + break; + } + return sr; +} + +void +stmt(Stmt *s) +{ + int l; + Symb x; +Again: + if (!s) + return; + + switch (s->t) { + case Expr: + expr(s->p1); + break; + case Seq: + stmt(s->p1); + s = s->p2; + goto Again; + case If: + x = expr(s->p1); + fprintf(of, "\tjnz "); /* to be clean, a comparison to 0 should be inserted here */ + psymb(x); + l = lbl; + lbl += 3; + fprintf(of, ", @l%d, @l%d\n", l, l+1); + fprintf(of, "@l%d\n", l); + stmt(s->p2); + if (s->p3) + fprintf(of, "\tjmp @l%d\n", l+2); + fprintf(of, "@l%d\n", l+1); + if (s->p3) { + stmt(s->p3); + fprintf(of, "@l%d\n", l+2); + } + break; + case While: + x = expr(s->p1); + l = lbl; + lbl += 3; + fprintf(of, "@l%d\n", l); + fprintf(of, "\tjnz "); /* ditto */ + psymb(x); + fprintf(of, ", @l%d, @l%d\n", l+1, l+2); + fprintf(of, "@l%d\n", l+1); + stmt(s->p2); + fprintf(of, "\tjmp @l%d\n", l); + fprintf(of, "@l%d\n", l+2); + break; + } +} + +Node * +mknode(char op, Node *l, Node *r) +{ + Node *n; + + n = alloc(sizeof *n); + n->op = op; + n->l = l; + n->r = r; + return n; +} + +Node * +mkidx(Node *a, Node *i) +{ + Node *n; + + n = mknode('+', a, i); + n = mknode('@', n, 0); + return n; +} + +Stmt * +mkstmt(int t, void *p1, void *p2, void *p3) +{ + Stmt *s; + + s = alloc(sizeof *s); + s->t = t; + s->p1 = p1; + s->p2 = p2; + s->p3 = p3; + return s; +} + +%} + +%union { + Node *n; + Stmt *s; + unsigned u; +} + +%token <n> NUM +%token <n> IDENT +%token PP MM LE GE + +%token TINT TLNG +%token IF ELSE WHILE + +%right '=' +%left '+' '-' +%left '*' '/' '%' +%nonassoc '&' +%left EQ NE +%left '<' '>' LE GE +%nonassoc PP MM +%nonassoc '[' + +%type <u> type +%type <s> stmt stmts +%type <n> expr + +%% + +prog: init '{' dcls stmts '}' +{ + stmt($4); + fprintf(of, "\tret\n"); +}; + +init: +{ + varclr(); + lbl = 0; + tmp = 0; + fprintf(of, "@l%d\n", lbl++); +}; + +dcls: | dcls type IDENT ';' +{ + int s; + char *v; + + v = $3->u.v; + s = SIZE($2); + varadd(v, $2); + fprintf(of, "\t%%%s =l alloc%d %d\n", v, s, s); +}; + +type: type '*' { $$ = IDIR($1); } + | TINT { $$ = INT; } + | TLNG { $$ = LNG; } + ; + +stmt: ';' { $$ = 0; } + | '{' stmts '}' { $$ = $2; } + | expr ';' { $$ = mkstmt(Expr, $1, 0, 0); } + | WHILE '(' expr ')' stmt { $$ = mkstmt(While, $3, $5, 0); } + | IF '(' expr ')' stmt ELSE stmt { $$ = mkstmt(If, $3, $5, $7); } + | IF '(' expr ')' stmt { $$ = mkstmt(If, $3, $5, 0); } + ; + +stmts: stmts stmt { $$ = mkstmt(Seq, $1, $2, 0); } + | { $$ = 0; } + ; + +expr: NUM + | IDENT + | expr '[' expr ']' { $$ = mkidx($1, $3); } + | '(' expr ')' { $$ = $2; } + | '*' expr { $$ = mknode('@', $2, 0); } + | '&' expr { $$ = mknode('&', $2, 0); } + | expr '=' expr { $$ = mknode('=', $1, $3); } + | expr '+' expr { $$ = mknode('+', $1, $3); } + | expr '-' expr { $$ = mknode('-', $1, $3); } + | expr '*' expr { $$ = mknode('*', $1, $3); } + | expr '/' expr { $$ = mknode('/', $1, $3); } + | expr '%' expr { $$ = mknode('%', $1, $3); } + | expr '<' expr { $$ = mknode('<', $1, $3); } + | expr '>' expr { $$ = mknode('<', $3, $1); } + | expr LE expr { $$ = mknode('l', $1, $3); } + | expr GE expr { $$ = mknode('l', $3, $1); } + | expr EQ expr { $$ = mknode('e', $1, $3); } + | expr NE expr { $$ = mknode('n', $1, $3); } + | expr PP { $$ = mknode('P', $1, 0); } + | expr MM { $$ = mknode('M', $1, 0); } + ; + +%% + +int +yylex() +{ + struct { + char *s; + int t; + } kwds[] = { + { "int", TINT }, + { "long", TLNG }, + { "if", IF }, + { "else", ELSE }, + { "while", WHILE }, + { 0, 0 } + }; + int i, c, c1, n, sgn; + char v[NString], *p; + + do + c = getchar(); + while (isspace(c)); + + if (c == EOF) + return 0; + + if (isdigit(c) || c == '-') { + n = 0; + sgn = 1; + if (c == '-') { + sgn = -1; + c = getchar(); + if (!isdigit(c)) { + ungetc(c, stdin); + return '-'; + } + } + do { + n *= 10; + n += c-'0'; + c = getchar(); + } while (isdigit(c)); + ungetc(c, stdin); + yylval.n = mknode('N', 0, 0); + yylval.n->u.n = n * sgn; + return NUM; + } + + if (isalpha(c)) { + p = v; + do { + if (p == &v[NString-1]) + die("ident too long"); + *p++ = c; + c = getchar(); + } while (isalpha(c)); + *p = 0; + ungetc(c, stdin); + for (i=0; kwds[i].s; i++) + if (strcmp(v, kwds[i].s) == 0) + return kwds[i].t; + yylval.n = mknode('V', 0, 0); + strcpy(yylval.n->u.v, v); + return IDENT; + } + + c1 = getchar(); +#define DI(a, b) a + b*256 + switch (DI(c,c1)) { + case DI('!','='): return NE; + case DI('=','='): return EQ; + case DI('<','='): return LE; + case DI('>','='): return GE; + case DI('+','+'): return PP; + case DI('-','-'): return MM; + } +#undef DI + ungetc(c1, stdin); + + return c; +} + +int +yyerror(char *err) +{ + die("parse error"); + return 0; +} + +int +main() +{ + of = stdout; + if (yyparse() != 0) + die("parse error"); + return 0; +} diff --git a/minic/test.minic b/minic/test.minic new file mode 100644 index 0000000..8d30eb1 --- /dev/null +++ b/minic/test.minic @@ -0,0 +1,9 @@ +{ + int x; + int *p; + + x = 0; + p = &x; + while (x < 10) + *p = x + 1; +} diff --git a/minic/yacc.c b/minic/yacc.c new file mode 100644 index 0000000..52fd891 --- /dev/null +++ b/minic/yacc.c @@ -0,0 +1,1365 @@ +/*% clang -g -Wall -Wextra % -o # + * miniyacc - LALR(1) grammars for C + * See LICENSE for copyright and license details. + */ +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +typedef int Sym; +typedef struct Rule Rule; +typedef struct TSet TSet; +typedef struct Info Info; +typedef struct Term Term; +typedef struct Item Item; +typedef struct Row Row; + +#define S ((Sym) -1) +#define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */ +#define GetBit(s,n) (s[n/32] & (1<<(n%32))) +#define SetBit(s,n) (s[n/32] |= 1<<(n%32)) + +enum { + IdntSz = 64, + MaxRhs = 32, + MaxTk = 500, + MaxNt = 500, + MaxRl = 800, + MaxTm = 1000, + + TSetSz = (MaxTk+31)/32, + Sym0 = MaxTk +}; + +struct Rule { + Sym lhs; + Sym rhs[MaxRhs]; + char *act; + int actln; + int prec; +}; + +struct TSet { + unsigned t[TSetSz]; +}; + +struct Info { + int nul; + TSet fst; + int prec; + enum { + ANone, + ALeft, + ARight, + ANonassoc + } assoc; + char name[IdntSz]; + char type[IdntSz]; +}; + +struct Term { + Rule *rule; + int dot; + TSet lk; +}; + +struct Item { + int id; + int nt; + Term ts[MaxTm]; + Item **gtbl; + int dirty; +}; + +struct Row { + int def; + int ndef; + int *t; +}; + +char srs[] = "shift/reduce conflict state %d token %s\n"; +char rrs[] = "reduce/reduce conflict state %d token %s\n"; + +Item i0; /* temporary item */ + +int nrl, nsy, nst, ntk; +Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */ +Info is[MaxTk+MaxNt]; /* symbol information */ +Item **st; /* LALR(1) states (ordered, icmp) */ +Row *as; /* action table [state][tok] */ +Row *gs; /* goto table [sym][state] */ +Sym sstart;/* start symbol */ +Item *ini; /* initial state */ +int doty; /* type-checking enabled */ + +int srconf, rrconf; +int actsz; +int *act; +int *chk; +int *adsp; +int *gdsp; + +int lineno = 1; +char *srca; +FILE *fin; +FILE *fout; +FILE *fgrm; +FILE *fhdr; + +void +die(char *s) +{ + fprintf(stderr, "%s (on line %d)\n", s, lineno); + exit(1); +} + +void * +yalloc(size_t n, size_t o) +{ + void *p; + + p = calloc(n, o); + if (!p) + die("out of memory"); + return p; +} + +int +rcmp(const void *a, const void *b) +{ + return ((Rule *)a)->lhs - ((Rule *)b)->lhs; +} + +Rule * +rfind(Sym lhs) +{ + Rule *r; + Rule k; + + k.lhs = lhs; + r = bsearch(&k, rs, nrl, sizeof *r, rcmp); + if (r != 0) + while (r > rs && r[-1].lhs == lhs) + r--; + return r; +} + +int +slen(Sym *l) +{ + int n; + + for (n=0; *l!=S; n++, l++); + return n; +} + +void +tszero(TSet *ts) +{ + memset(ts, 0, sizeof *ts); +} + +int +tsunion(TSet *tsa, TSet *tsb) +{ + int n; + unsigned *a, *b, c, t; + + c = 0; + a = tsa->t; + b = tsb->t; + n = (31+ntk)/32; + while (n-- > 0) { + t = *a; + *a |= *b++; + c |= t ^ *a++; + } + return !!c; +} + +void +first(TSet *ts, Sym *stnc, TSet *last) +{ + Sym f; + + f = stnc[0]; + if (f == S) { + if (last) + tsunion(ts, last); + return; + } + if (f < ntk) { + SetBit(ts->t, f); + return; + } + if (is[f].nul) + first(ts, stnc+1, last); + tsunion(ts, &is[f].fst); +} + +void +ginit() +{ + int chg; + Rule *r; + Info *i; + Sym *s; + TSet ts; + + do { + chg = 0; + for (r=rs; r-rs<nrl; r++) { + i = &is[r->lhs]; + for (s=r->rhs; *s!=S; s++) + if (!is[*s].nul) + goto nonul; + chg |= i->nul == 0; + i->nul = 1; + nonul: + tszero(&ts); + first(&ts, r->rhs, 0); + chg |= tsunion(&i->fst, &ts); + } + } while (chg); +} + +int +tcmp(Term *a, Term *b) +{ + int c; + + c = a->rule - b->rule; + if (c==0) + c = a->dot - b->dot; + return c; +} + +int +tcmpv(const void *a, const void *b) +{ + return tcmp((Term *)a, (Term *)b); +} + +void +iclose(Item *i) +{ + int smap[MaxNt]; + Rule *r; + Term *t, t1; + Sym s, *rem; + int chg, n, m; + + t1.dot = 0; + memset(smap, 0, sizeof smap); + for (n=0; n<i->nt; n++) { + t = &i->ts[n]; + s = t->rule->lhs-Sym0; + if (t->dot==0) + if (smap[s]==0) + smap[s] = n; + } + do { + chg = 0; + for (n=0; n<i->nt; n++) { + t = &i->ts[n]; + rem = &t->rule->rhs[t->dot]; + s = *rem++; + if (s < Sym0 || s == S) + continue; + r = rfind(s); + if (!r) + die("some non-terminals are not defined"); + tszero(&t1.lk); + first(&t1.lk, rem, &t->lk); + m = smap[s-Sym0]; + if (m) + for (; r-rs<nrl && r->lhs==s; r++, m++) + chg |= tsunion(&i->ts[m].lk, &t1.lk); + else { + m = i->nt; + smap[s-Sym0] = m; + for (; r-rs<nrl && r->lhs==s; r++, m++) { + if (m>=MaxTm) + die("too many terms in item"); + t1.rule = r; + i->ts[m] = t1; + } + i->nt = m; + chg = 1; + } + } + } while (chg); +} + +void +igoto(Item *i, Sym s) +{ + Term *t, *t1; + int n; + + i0.nt = 0; + for (n=0, t=i->ts; n<i->nt; n++, t++) { + if (t->rule->rhs[t->dot] != s) + continue; + t1 = &i0.ts[i0.nt++]; + *t1 = *t; + t1->dot++; + } + qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv); +} + +int +icmp(Item *a, Item *b) +{ + Term *ta, *tb, *ma, *mb; + int c; + + ta = a->ts; + tb = b->ts; + ma = ta+a->nt; + mb = tb+b->nt; + for (;;) { + if (ta==ma || ta->dot==0) + return -(tb<mb && tb->dot); + if (tb==mb || tb->dot==0) + return +(ta<ma && ta->dot); + if ((c=tcmp(ta++, tb++))) + return c; + } +} + +int +stadd(Item **pi) +{ + Item *i, *i1; + int lo, hi, mid, n, chg; + + /* http://www.iq0.com/duffgram/bsearch.c */ + i = *pi; + lo = 0; + hi = nst - 1; + if (hi<0 || icmp(i, st[hi])>0) + hi++; + else if (icmp(i, st[lo])<=0) + hi = lo; + else + while (hi-lo!=1) { + mid = (lo+hi)/2; + if (icmp(st[mid], i)<0) + lo = mid; + else + hi = mid; + } + if (hi<nst && icmp(st[hi], i)==0) { + chg = 0; + i1 = st[hi]; + for (n=0; n<i->nt; n++) + chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk); + i1->dirty |= chg; + *pi = i1; + return chg; + } else { + st = realloc(st, ++nst * sizeof st[0]); + if (!st) + die("out of memory"); + memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]); + i->gtbl = yalloc(nsy, sizeof i->gtbl[0]); + i->dirty = 1; + i1 = yalloc(1, sizeof *i1); + *i1 = *i; + *pi = st[hi] = i1; + return 1; + } +} + +void +stgen() +{ + Sym s; + Rule *r; + Item *i, *i1; + Term tini; + int n, chg; + + ini = &i0; + r = rfind(Sym0); + tini.rule = r; + tini.dot = 0; + tszero(&tini.lk); + SetBit(tini.lk.t, 0); + i0.nt = 0; + i0.ts[i0.nt++] = tini; + stadd(&ini); + do { + chg = 0; + for (n=0; n<nst; n++) { + i = st[n]; + if (!i->dirty) + continue; + i->dirty = 0; + iclose(i); + for (s=0; s<nsy; s++) { + igoto(i, s); + i1 = &i0; + if (!i1->nt) { + i->gtbl[s] = 0; + continue; + } + chg |= stadd(&i1); + i->gtbl[s] = i1; + } + } + } while (chg); +} + +int +resolve(Rule *r, Sym s, int st) +{ + if (!r->prec || !is[s].prec) { + conflict: + if (fgrm) + fprintf(fgrm, srs, st, is[s].name); + srconf++; + return ARight; + } + if (r->prec==is[s].prec) { + if (is[s].assoc == ANone) + goto conflict; + return is[s].assoc; + } else + if (r->prec<is[s].prec) + return ARight; + else + return ALeft; +} + +void +tblset(int *tbl, Item *i, Term *t) +{ + int act; + Sym s; + + s = t->rule->rhs[t->dot]; + if (s!=S) { + /* shift */ + if (s>=ntk) + return; + assert(i->gtbl[s]); + act = ARight; + if (tbl[s] && tbl[s] != i->gtbl[s]->id) { + assert(tbl[s]<=0); + act = resolve(&rs[Red(tbl[s])], s, i->id-1); + } + switch (act) { + case ARight: + tbl[s] = i->gtbl[s]->id; + break; + case ANonassoc: + tbl[s] = -1; + break; + } + } else + /* reduce */ + for (s=0; s<ntk; s++) { + if (!GetBit(t->lk.t, s)) + continue; + /* default to shift if conflict occurs */ + if (!tbl[s]) + act = ALeft; + else if (tbl[s]<0) { + if (fgrm) + fprintf(fgrm, rrs, i->id-1, is[s].name); + rrconf++; + act = ARight; + } else + act = resolve(t->rule, s, i->id-1); + switch (act) { + case ALeft: + tbl[s] = Red(t->rule-rs); + break; + case ANonassoc: + tbl[s] = -1; + break; + } + } +} + +void +setdef(Row *r, int w, int top) +{ + int n, m, x, cnt, def, max; + + max = 0; + def = -1; + r->ndef = 0; + for (n=0; n<w; n++) { + x = r->t[n]; + if (x==0) + r->ndef++; + if (x>=top || x==0) + continue; + cnt = 1; + for (m=n+1; m<w; m++) + if (r->t[m]==x) + cnt++; + if (cnt>max) { + def = x; + max = cnt; + } + } + r->def = def; + if (max!=0) + /* zero out the most frequent entry */ + for (n=0; n<w; n++) + if (r->t[n]==def) { + r->t[n] = 0; + r->ndef++; + } +} + +void +tblgen() +{ + Row *r; + Item *i; + int n, m; + + for (n=0; n<nst; n++) + st[n]->id = n+1; + as = yalloc(nst, sizeof as[0]); + gs = yalloc(nsy-MaxTk, sizeof gs[0]); + /* fill action table */ + for (n=0; n<nst; n++) { + r = &as[n]; + r->t = yalloc(ntk, sizeof r->t[0]); + for (i=st[n], m=0; m<i->nt; m++) + tblset(r->t, i, &i->ts[m]); + setdef(r, ntk, -1); + r->def = Red(r->def); /* Red(-1) == -1 */ + } + /* fill goto table */ + for (n=MaxTk; n<nsy; n++) { + r = &gs[n-MaxTk]; + r->t = yalloc(nst, sizeof r->t[0]); + for (m=0; m<nst; m++) + if (st[m]->gtbl[n]) + r->t[m] = st[m]->gtbl[n]->id; + setdef(r, nst, nst+1); + } +} + +int +prcmp(const void *a, const void *b) +{ + return (*(Row **)a)->ndef - (*(Row **)b)->ndef; +} + +void +actgen() +{ + Row **o, *r; + int n, m, t, dsp, nnt; + + actsz = 0; + o = yalloc(nst+nsy, sizeof o[0]); + act = yalloc(nst*nsy, sizeof act[0]); + chk = yalloc(nst*nsy, sizeof chk[0]); + adsp = yalloc(nst, sizeof adsp[0]); + for (n=0; n<nst*nsy; n++) + chk[n] = -1; + /* fill in actions */ + for (n=0; n<nst; n++) + o[n] = &as[n]; + qsort(o, nst, sizeof o[0], prcmp); + for (n=0; n<nst; n++) { + r = o[n]; + dsp = 0; + for (m=0; m<ntk && r->t[m]==0; m++) + dsp--; + retrya: + /* The invariant here is even + * trickier than it looks. + */ + for (t=0; t<ntk; t++) + if ((m=dsp+t)>=0 && chk[m]>=0) + if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t])) + || (!r->t[t] && chk[m]==t)) { + dsp++; + goto retrya; + } + adsp[r-as] = dsp; + for (t=0; t<ntk; t++) + if (r->t[t]) { + chk[dsp+t] = t; + act[dsp+t] = r->t[t]; + if (dsp+t>=actsz) + actsz = dsp+t+1; + } + } + /* fill in gotos */ + nnt = nsy-MaxTk; + gdsp = yalloc(nnt, sizeof gdsp[0]); + for (n=0; n<nnt; n++) + o[n] = &gs[n]; + qsort(o, nnt, sizeof o[0], prcmp); + for (n=0; n<nnt; n++) { + r = o[n]; + dsp = 0; + for (m=0; m<nst && r->t[m]==0; m++) + dsp--; + retryg: + for (t=m; t<nst; t++) + if (chk[dsp+t]>=0 && r->t[t]) { + dsp++; + goto retryg; + } + gdsp[r-gs] = dsp; + for (t=m; t<nst; t++) + if (r->t[t]) { + chk[dsp+t] = ntk+(r-gs); + act[dsp+t] = r->t[t]; + if (dsp+t>=actsz) + actsz = dsp+t+1; + } + } + free(o); +} + +void +aout(char *name, int *t, int n) +{ + int i; + + fprintf(fout, "short %s[] = {", name); + for (i=0; i<n; i++) { + if (i % 10 == 0) + fprintf(fout, "\n"); + fprintf(fout, "%4d", t[i]); + if (i != n-1) + fprintf(fout, ","); + } + fprintf(fout, "\n};\n"); +} + +void +tblout() +{ + int *o, n, m; + + fprintf(fout, "short yyini = %d;\n", ini->id-1); + fprintf(fout, "short yyntoks = %d;\n", ntk); + o = yalloc(nrl+nst+nsy, sizeof o[0]); + for (n=0; n<nrl; n++) + o[n] = slen(rs[n].rhs); + aout("yyr1", o, nrl); + for (n=0; n<nrl; n++) + o[n] = rs[n].lhs-MaxTk; + aout("yyr2", o, nrl); + for (n=0; n<nst; n++) + o[n] = as[n].def; + aout("yyadef", o, nst); + for (n=0; n<nsy-MaxTk; n++) { + o[n] = gs[n].def; + assert(o[n]>0 || o[n]==-1); + if (o[n]>0) + o[n]--; + } + aout("yygdef", o, nsy-MaxTk); + aout("yyadsp", adsp, nst); + aout("yygdsp", gdsp, nsy-MaxTk); + for (n=0; n<actsz; n++) + if (act[n]>=0) + act[n]--; + aout("yyact", act, actsz); + aout("yychk", chk, actsz); + for (n=0; n<128; n++) { + o[n] = 0; + for (m=0; m<ntk; m++) + if (is[m].name[0]=='\'') + if (is[m].name[1]==n) + assert(!o[n]), o[n] = m; + } + m = 128; + for (n=1; n<ntk; n++) { + if (is[n].name[0]=='\'') + continue; + fprintf(fout, "#define %s %d\n", is[n].name, m); + if (fhdr) + fprintf(fhdr, "#define %s %d\n", is[n].name, m); + o[m++] = n; + } + aout("yytrns", o, m); + if (fhdr) { + fputs("int yyparse(void);\n", fhdr); + fputs("#ifndef YYSTYPE\n", fhdr); + fputs("#define YYSTYPE int\n", fhdr); + fputs("#endif\n", fhdr); + fputs("extern YYSTYPE yylval;\n", fhdr); + fputs("#endif\n", fhdr); + } + free(o); +} + +void +stdump() +{ + Term *t; + Sym *s1; + int n, m, d, act; + Rule *r; + Row *ar; + + if (!fgrm) + return; + for (r=rs; r-rs<nrl; r++) { + fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name); + for (s1=r->rhs; *s1!=S; s1++) + fprintf(fgrm, " %s", is[*s1].name); + } + fprintf(fgrm, "\n"); + for (m=0; m<nst; m++) { + fprintf(fgrm, "\nState %d:\n", m); + for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) { + r = t->rule; + d = t->dot; + if (d==0 && t!=st[m]->ts) + continue; + fprintf(fgrm, " %s ->", is[r->lhs].name); + for (s1=r->rhs; *s1!=S; s1++, d--) + fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name); + if (!d) + fprintf(fgrm, " ."); + fprintf(fgrm, "\n"); + } + fprintf(fgrm, "\n"); + ar = &as[m]; + for (n=0; n<ntk; n++) { + act = ar->t[n]; + if (!act) + continue; + if (act==-1) + fprintf(fgrm, " %s error (nonassoc)\n", is[n].name); + else if (act<0) + fprintf(fgrm, " %s reduce with rule %d\n", is[n].name, Red(act)); + else + fprintf(fgrm, " %s shift and go to %d\n", is[n].name, act-1); + } + if (ar->def != -1) + fprintf(fgrm, " * reduce with rule %d\n", ar->def); + } +} + +enum { + TIdnt, + TTokchr, /* 'c' */ + TPP, /* %% */ + TLL, /* %{ */ + TLangle, /* < */ + TRangle, /* > */ + TSemi, /* ; */ + TBar, /* | */ + TColon, /* : */ + TLBrack, /* { */ + TUnion, + TType, + TToken, + TRight, + TLeft, + TNonassoc, + TPrec, + TStart, + TEof +}; + +struct { + char *name; + int tok; +} words[] = { + { "%%", TPP }, + { "%union", TUnion }, + { "%type", TType }, + { "%token", TToken }, + { "%right", TRight }, + { "%left", TLeft }, + { "%nonassoc", TNonassoc }, + { "%prec", TPrec }, + { "%start", TStart }, + { 0, 0 } +}; + +char idnt[IdntSz]; + +int +istok(int c) +{ + return isalnum(c) || c=='_' || c=='%'; +} + +int +nexttk() +{ + int n; + char c, *p; + + while (isspace(c=fgetc(fin))) + if (c == '\n') + lineno++; + switch (c) { + case '<': + return TLangle; + case '>': + return TRangle; + case ';': + return TSemi; + case '|': + return TBar; + case ':': + return TColon; + case '{': + return TLBrack; + case EOF: + return TEof; + case '\'': + idnt[0] = '\''; + idnt[1] = fgetc(fin); + idnt[2] = '\''; + idnt[3] = 0; + if (fgetc(fin)!='\'') + die("syntax error, invalid char token"); + return TTokchr; + } + p = idnt; + while (istok(c)) { + *p++ = c; + if (p-idnt >= IdntSz-1) + die("identifier too long"); + c = fgetc(fin); + } + *p = 0; + if (strcmp(idnt, "%")==0) + if (c=='{') + return TLL; + ungetc(c, fin); + for (n=0; words[n].name; n++) + if (strcmp(idnt, words[n].name) == 0) + return words[n].tok; + return TIdnt; +} + +char * +cpycode() +{ + int c, nest, len, pos; + char *s; + + len = 64; + s = yalloc(len+1, 1); + s[0] = '{'; + pos = 1; + nest = 1; + + while (nest) { + c = fgetc(fin); + if (c == '{') + nest++; + if (c == '}') + nest--; + if (c == EOF) + die("syntax error, unclosed code block"); + if (c == '\n') + lineno++; + if (pos>=len) + if (!(s=realloc(s, len=2*len+1))) + die("out of memory"); + s[pos++] = c; + } + s[pos] = 0; + return s; +} + +int +gettype(char *type) +{ + int tk; + + tk = nexttk(); + if (tk==TLangle) { + if (nexttk()!=TIdnt) + die("syntax error, ident expected after <"); + strcpy(type, idnt); + if (nexttk()!=TRangle) + die("syntax error, unclosed <"); + return nexttk(); + } else { + type[0] = 0; + return tk; + } +} + +Sym +findsy(char *name, int add) +{ + int n; + + for (n=0; n<nsy; n++) { + if (n == ntk) { + if (name[0]=='\'') { + if (ntk>=MaxTk) + die("too many tokens"); + ntk++; + strcpy(is[n].name, name); + return n; + } + n = MaxTk; + } + if (strcmp(is[n].name, name)==0) + return n; + } + if (add) { + if (nsy>=MaxTk+MaxNt) + die("too many non-terminals"); + strcpy(is[nsy].name, name); + return nsy++; + } else + return nsy; +} + +void +getdecls() +{ + int tk, prec, p, a, c, c1, n; + Info *si; + char type[IdntSz], *s; + + strcpy(is[0].name, "$"); + ntk = 1; + strcpy(is[Sym0].name, "@start"); + nsy = MaxTk+1; + sstart = S; + prec = 0; + tk = nexttk(); + for (;;) + switch (tk) { + case TStart: + tk = nexttk(); + if (tk!=TIdnt) + die("syntax error, ident expected after %start"); + sstart = findsy(idnt, 1); + if (sstart<ntk) + die("%start cannot specify a token"); + tk = nexttk(); + break; + case TUnion: + tk = nexttk(); + if (tk!=TLBrack) + die("syntax error, { expected after %union"); + fprintf(fout, "#line %d \"%s\"\n", lineno, srca); + s = cpycode(); + fprintf(fout, "typedef union %s yyunion;\n", s); + fprintf(fout, "#define YYSTYPE yyunion\n"); + if (fhdr) { + fprintf(fhdr, "typedef union %s yyunion;\n", s); + fprintf(fhdr, "#define YYSTYPE yyunion\n"); + } + free(s); + doty = 1; + tk = nexttk(); + break; + case TLeft: + p = ++prec; + a = ALeft; + goto addtoks; + case TRight: + p = ++prec; + a = ARight; + goto addtoks; + case TNonassoc: + p = ++prec; + a = ANonassoc; + goto addtoks; + case TToken: + p = 0; + a = ANone; + addtoks: + tk = gettype(type); + while (tk==TIdnt || tk==TTokchr) { + si = 0; + n = findsy(idnt, 0); + if (n>=MaxTk && n<nsy) + die("non-terminal redeclared as token"); + if (n==nsy) { + if (ntk>=MaxTk) + die("too many tokens"); + n = ntk++; + } + si = &is[n]; + strcpy(si->name, idnt); + strcpy(si->type, type); + si->prec = p; + si->assoc = a; + tk = nexttk(); + } + break; + case TType: + tk = gettype(type); + if (type[0]==0) + die("syntax error, type expected"); + while (tk==TIdnt) { + si = 0; + n = findsy(idnt, 1); + if (n<ntk) + die("token redeclared as non-terminal"); + if (n==nsy) { + nsy++; + } + si = &is[n]; + strcpy(si->name, idnt); + strcpy(si->type, type); + tk = nexttk(); + } + break; + case TLL: + fprintf(fout, "#line %d \"%s\"\n", lineno, srca); + for (;;) { + c = fgetc(fin); + if (c == EOF) + die("syntax error, unclosed %{"); + if (c == '%') { + c1 = fgetc(fin); + if (c1 == '}') { + fputs("\n", fout); + break; + } + ungetc(c1, fin); + } + if (c == '\n') + lineno++; + fputc(c, fout); + } + tk = nexttk(); + break; + case TPP: + return; + case TEof: + die("syntax error, unfinished declarations"); + default: + die("syntax error, declaration expected"); + } +} + +void +getgram() +{ + extern char *retcode; + int tk; + Sym hd, *p, s; + Rule *r; + + for (;;) { + tk = nexttk(); + if (tk==TPP || tk==TEof) { + if (sstart==S) + die("syntax error, empty grammar"); + r = &rs[nrl++]; + r->lhs = Sym0; + r->rhs[0] = sstart; + r->rhs[1] = 0; + r->rhs[2] = S; + r->act = retcode; + qsort(rs, nrl, sizeof rs[0], rcmp); + return; + } + if (tk!=TIdnt) + die("syntax error, production rule expected"); + if (nexttk()!=TColon) + die("syntax error, colon expected after production's head"); + hd = findsy(idnt, 1); + if (sstart==S) + sstart = hd; + do { + if (nrl>=MaxRl-1) + die("too many rules"); + r = &rs[nrl++]; + r->lhs = hd; + r->act = 0; + p = r->rhs; + while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) { + if (tk==TPrec) { + tk = nexttk(); + if (tk!=TIdnt + || (s=findsy(idnt, 0))>=ntk) + die("token expected after %prec"); + r->prec = is[s].prec; + continue; + } + s = findsy(idnt, 1); + *p++ = s; + if (s<ntk && is[s].prec>0) + r->prec = is[s].prec; + if (p-r->rhs >= MaxRhs-1) + die("production rule too long"); + } + *p = S; + if (tk==TLBrack) { + r->actln = lineno; + r->act = cpycode(); + tk = nexttk(); + } + } while (tk==TBar); + if (tk!=TSemi) + die("syntax error, ; or | expected"); + } +} + +void +actout(Rule *r) +{ + long l; + int i, ar; + char c, *p, *ty, tya[IdntSz]; + + ar = slen(r->rhs); + p = r->act; + i = r->actln; + if (!p) + return; + while ((c=*p++)) + switch (c) { + case '\n': + i++; + default: + fputc(c, fout); + break; + case '$': + c = *p++; + if (c == '$') { + fprintf(fout, "yyval"); + if (doty) { + ty = is[r->lhs].type; + if (!ty[0]) { + lineno = i; + die("$$ has no type"); + } + fprintf(fout, ".%s", ty); + } + } + else if (c == '<') { + ty = tya; + while (istok(*p) && ty-tya<IdntSz-1) + *ty++ = *p++; + *ty = 0; + if (*p++!='>') { + lineno = i; + die("unclosed tag field"); + } + ty = tya; + c = *p++; + if (c == '$') { + fprintf(fout, "yyval.%s", ty); + } else { + if (!isdigit(c)) { + lineno = i; + die("number or $ expected afer tag field"); + } + goto readnum; + } + } + else if (isdigit(c)) { + ty = 0; + readnum: + l = strtol(p-1, &p, 10); + if (l > ar) { + lineno = i; + die("invalid $n"); + } + fprintf(fout, "ps[%d].val", (int)l); + if (doty) { + if (!ty && l>0) + ty = is[r->rhs[l-1]].type; + if (!ty || !ty[0]) { + lineno = i; + die("$n has no type"); + } + fprintf(fout, ".%s", ty); + } + } + } + fputs("\n", fout); +} + +void +codeout() +{ + extern char *code0[], *code1[]; + char **p; + Rule *r; + int n, c; + + for (p=code0; *p; p++) + fputs(*p, fout); + for (n=0; n<nrl; n++) { + fprintf(fout, "\tcase %d:\n", n); + r = &rs[n]; + fprintf(fout, "#line %d \"%s\"\n", r->actln, srca); + actout(r); + fputs("\t\tbreak;\n", fout); + } + for (p=code1; *p; p++) + fputs(*p, fout); + fprintf(fout, "#line %d \"%s\"\n", lineno, srca); + while ((c=fgetc(fin))!=EOF) + fputc(c, fout); +} + +void +init(int ac, char *av[]) +{ + int c, vf, df; + char *pref, buf[100], *opt; + + (void) ac; + pref = "y"; + vf = df = 0; + for (av++; av[0] && av[0][0]=='-'; av++) + for (opt = &av[0][1]; (c = *opt); opt++) + switch (c) { + case 'v': + vf = 1; + break; + case 'd': + df = 1; + break; + case 'b': + if ((pref = *++av)) + break; + default: + usage: + fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr); + exit(1); + } + + if (!(srca = *av)) + goto usage; + fin = fopen(srca, "r"); + if (strlen(pref) + 10 > sizeof buf) + die("-b prefix too long"); + sprintf(buf, "%s.tab.c", pref); + fout = fopen(buf, "w"); + if (vf) { + sprintf(buf, "%s.output", pref); + fgrm = fopen(buf, "w"); + } + if (df) { + sprintf(buf, "%s.tab.h", pref); + fhdr = fopen(buf, "w"); + if (fhdr) { + fprintf(fhdr, "#ifndef Y_TAB_H_\n"); + fprintf(fhdr, "#define Y_TAB_H_\n"); + } + } + if (!fin || !fout || (!fgrm && vf) || (!fhdr && df)) + die("cannot open work files"); +} + +int +main(int ac, char *av[]) +{ + + init(ac, av); + getdecls(); + getgram(); + ginit(); + stgen(); + tblgen(); + stdump(); + actgen(); + tblout(); + codeout(); + + if (srconf) + fprintf(stderr, "%d shift/reduce conflicts\n", srconf); + if (rrconf) + fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf); + + exit(0); +} + +/* Glorious macros. + |sed 's|.*|"&\\n",|' +*/ + +char *retcode = "\t\tyyval = ps[1].val; return 0;"; + +char *code0[] = { +"\n", +"#ifndef YYSTYPE\n", +"#define YYSTYPE int\n", +"#endif\n", +"YYSTYPE yylval;\n", +"\n", +"int\n", +"yyparse()\n", +"{\n", +" enum {\n", +" StackSize = 100,\n", +" ActSz = sizeof yyact / sizeof yyact[0],\n", +" };\n", +" struct {\n", +" YYSTYPE val;\n", +" int state;\n", +" } stk[StackSize], *ps;\n", +" int r, h, n, s, tk;\n", +" YYSTYPE yyval;\n", +"\n", +" ps = stk;\n", +" ps->state = s = yyini;\n", +" tk = -1;\n", +"loop:\n", +" n = yyadsp[s];\n", +" if (tk < 0 && n > -yyntoks)\n", +" tk = yytrns[yylex()];\n", +" n += tk;\n", +" if (n < 0 || n >= ActSz || yychk[n] != tk) {\n", +" r = yyadef[s];\n", +" if (r < 0)\n", +" return -1;\n", +" goto reduce;\n", +" }\n", +" n = yyact[n];\n", +" if (n == -1)\n", +" return -1;\n", +" if (n < 0) {\n", +" r = - (n+2);\n", +" goto reduce;\n", +" }\n", +" tk = -1;\n", +" yyval = yylval;\n", +"stack:\n", +" ps++;\n", +" if (ps-stk >= StackSize)\n", +" return -2;\n", +" s = n;\n", +" ps->state = s;\n", +" ps->val = yyval;\n", +" goto loop;\n", +"reduce:\n", +" ps -= yyr1[r];\n", +" h = yyr2[r];\n", +" s = ps->state;\n", +" n = yygdsp[h] + s;\n", +" if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n", +" n = yygdef[h];\n", +" else\n", +" n = yyact[n];\n", +" switch (r) {\n", +0 +}; + +char *code1[] = { +" }\n", +" goto stack;\n", +"}\n", +0 +}; |