summary refs log tree commit diff
path: root/minic/minic.y
diff options
context:
space:
mode:
Diffstat (limited to 'minic/minic.y')
-rw-r--r--minic/minic.y629
1 files changed, 629 insertions, 0 deletions
diff --git a/minic/minic.y b/minic/minic.y
new file mode 100644
index 0000000..00351e1
--- /dev/null
+++ b/minic/minic.y
@@ -0,0 +1,629 @@
+%{
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+enum {
+	NString = 16,
+	NVar = 256,
+};
+
+
+enum { /* minic types */
+	INT = 0,
+	LNG = 1,
+	PTR = 2,
+};
+
+#define IDIR(x) (((x) << 2) + PTR)
+#define DREF(x) ((x) >> 2)
+#define KIND(x) ((x) & 3)
+#define SIZE(x) (KIND(x) == INT ? 4 : 8)
+
+typedef struct Node Node;
+typedef struct Symb Symb;
+typedef struct Stmt Stmt;
+
+struct Node {
+	char op;
+	union {
+		int n;
+		char v[NString];
+	} u;
+	Node *l, *r;
+};
+
+struct Symb {
+	enum {
+		Con,
+		Tmp,
+		Var,
+	} t;
+	union {
+		int n;
+		char v[NString];
+	} u;
+	unsigned long ctyp;
+};
+
+struct Stmt {
+	enum {
+		If,
+		While,
+		Seq,
+		Expr,
+	} t;
+	void *p1, *p2, *p3;
+};
+
+int yylex(void), yyerror(char *);
+Symb expr(Node *), lval(Node *);
+
+FILE *of;
+int lbl, tmp;
+struct {
+	char v[NString];
+	unsigned ctyp;
+} varh[NVar];
+
+void
+die(char *s)
+{
+	fprintf(stderr, "error: %s\n", s);
+	exit(1);
+}
+
+void *
+alloc(size_t s)
+{
+	void *p;
+
+	p = malloc(s);
+	if (!p)
+		die("out of memory");
+	return p;
+}
+
+unsigned
+hash(char *s)
+{
+	unsigned h;
+
+	h = 42;
+	while (*s)
+		h += 11 * h + *s++;
+	return h % NVar;
+}
+
+void
+varclr()
+{
+	unsigned h;
+
+	for (h=0; h<NVar; h++)
+		varh[h].v[0] = 0;
+}
+
+void
+varadd(char *v, unsigned ctyp)
+{
+	unsigned h0, h;
+
+	h0 = hash(v);
+	h = h0;
+	do {
+		if (varh[h].v[0] == 0) {
+			strcpy(varh[h].v, v);
+			varh[h].ctyp = ctyp;
+			return;
+		}
+		if (strcmp(varh[h].v, v) == 0)
+			die("double definition");
+	} while(++h != h0);
+	die("too many variables");
+}
+
+unsigned
+varctyp(char *v)
+{
+	unsigned h0, h;
+
+	h0 = hash(v);
+	h = h0;
+	do {
+		if (strcmp(varh[h].v, v) == 0)
+			return varh[h].ctyp;
+	} while (++h != h0 && varh[h].v[0] != 0);
+	die("undeclared variable");
+	return -1;
+}
+
+char
+irtyp(unsigned ctyp)
+{
+	switch (KIND(ctyp)) {
+	default:
+		die("internal error");
+	case INT:
+		return 'w';
+	case PTR:
+	case LNG:
+		return 'l';
+	}
+}
+
+void
+psymb(Symb s)
+{
+	switch (s.t) {
+	case Tmp:
+		fprintf(of, "%%t%d", s.u.n);
+		break;
+	case Var:
+		fprintf(of, "%%%s", s.u.v);
+		break;
+	case Con:
+		fprintf(of, "%d", s.u.n);
+		break;
+	}
+}
+
+void
+sext(Symb *s)
+{
+	fprintf(of, "\t%%t%d =l sext ", tmp);
+	psymb(*s);
+	fprintf(of, "\n");
+	s->t = Tmp;
+	s->ctyp = LNG;
+	s->u.n = tmp++;
+}
+
+unsigned
+prom(int op, Symb *l, Symb *r)
+{
+	Symb *t;
+	int sz;
+
+	if (l->ctyp == r->ctyp && KIND(l->ctyp) != PTR)
+		return l->ctyp;
+
+	if (l->ctyp == LNG && r->ctyp == INT) {
+		sext(r);
+		return LNG;
+	}
+	if (l->ctyp == INT && r->ctyp == LNG) {
+		sext(l);
+		return LNG;
+	}
+
+	if (op == '+') {
+		if (KIND(r->ctyp) == PTR) {
+			t = l;
+			l = r;
+			r = t;
+		}
+		if (KIND(r->ctyp) == PTR)
+			die("pointers added");
+		goto Scale;
+	}
+
+	if (op == '-') {
+		if (KIND(l->ctyp) != PTR)
+			die("pointer substracted from integer");
+		if (KIND(r->ctyp) != PTR)
+			goto Scale;
+		if (l->ctyp != r->ctyp)
+			die("non-homogeneous pointers in substraction");
+		return LNG;
+	}
+
+Scale:
+	if (irtyp(r->ctyp) != 'l')
+		sext(r);
+	sz = SIZE(DREF(l->ctyp));
+	fprintf(of, "\t%%t%d =l mul %d, ", sz, tmp);
+	psymb(*r);
+	fprintf(of, "\n");
+	r->u.n = tmp++;
+	return l->ctyp;
+}
+
+Symb
+expr(Node *n)
+{
+	static char *otoa[] = {
+		['+'] = "add",
+		['-'] = "sub",
+		['*'] = "mul",
+		['/'] = "div",
+		['%'] = "rem",
+		['<'] = "cslt",  /* meeeeh, careful with pointers */
+		['l'] = "csle",
+		['e'] = "cseq",
+		['n'] = "csne",
+	};
+	Symb sr, s0, s1;
+
+	sr.t = Tmp;
+	sr.u.n = tmp++;
+
+	switch (n->op) {
+
+	case 'V':
+		sr.ctyp = varctyp(n->u.v);
+		fprintf(of, "\t");
+		psymb(sr);
+		fprintf(of, " =%c ", irtyp(sr.ctyp));
+		fprintf(of, "load %%%s\n", n->u.v);
+		break;
+
+	case 'N':
+		sr.t = Con;
+		sr.u.n = n->u.n;
+		sr.ctyp = INT;
+		break;
+
+	case '@':
+		s0 = expr(n->l);
+		if (KIND(s0.ctyp) != PTR)
+			die("dereference of a non-pointer");
+		sr.ctyp = DREF(s0.ctyp);
+		fprintf(of, "\t");
+		psymb(sr);
+		fprintf(of, " =%c load ", irtyp(sr.ctyp));
+		psymb(s0);
+		fprintf(of, "\n");
+		break;
+
+	case '&':
+		sr = lval(n->l);
+		sr.ctyp = IDIR(sr.ctyp);
+		break;
+
+	case '=':
+		s0 = expr(n->r);
+		s1 = lval(n->l);
+		sr = s0;
+		if (s1.ctyp == LNG &&  s0.ctyp == INT)
+			sext(&s0);
+		if (s1.ctyp != s0.ctyp)
+			die("invalid assignment");
+		fprintf(of, "\tstore%c ", irtyp(s1.ctyp));
+		goto Args;
+
+	case 'P':
+	case 'M':
+		die("unimplemented ++ and --");
+		break;
+
+	default:
+		s0 = expr(n->l);
+		s1 = expr(n->r);
+		sr.ctyp = prom(n->op, &s0, &s1);
+		if (strchr("ne<l", n->op))
+			sr.ctyp = INT;
+		fprintf(of, "\t");
+		psymb(sr);
+		fprintf(of, " =%c", irtyp(sr.ctyp));
+		fprintf(of, " %s ", otoa[(int)n->op]);
+	Args:
+		psymb(s0);
+		fprintf(of, ", ");
+		psymb(s1);
+		fprintf(of, "\n");
+		break;
+
+	}
+	if (n->op == '-'
+	&&  KIND(s0.ctyp) == PTR
+	&&  KIND(s1.ctyp) == PTR) {
+		fprintf(of, "\t%%t%d =l div ", tmp);
+		psymb(sr);
+		fprintf(of, ", %d\n", SIZE(DREF(s0.ctyp)));
+		sr.u.n = tmp++;
+	}
+	return sr;
+}
+
+Symb
+lval(Node *n)
+{
+	Symb sr;
+
+	switch (n->op) {
+	default:
+		die("invalid lvalue");
+	case 'V':
+		sr.t = Var;
+		sr.ctyp = varctyp(n->u.v);
+		strcpy(sr.u.v, n->u.v);
+		break;
+	case '@':
+		sr = expr(n->l);
+		if (KIND(sr.ctyp) != PTR)
+			die("dereference of a non-pointer");
+		sr.ctyp = DREF(sr.ctyp);
+		break;
+	}
+	return sr;
+}
+
+void
+stmt(Stmt *s)
+{
+	int l;
+	Symb x;
+Again:
+	if (!s)
+		return;
+
+	switch (s->t) {
+	case Expr:
+		expr(s->p1);
+		break;
+	case Seq:
+		stmt(s->p1);
+		s = s->p2;
+		goto Again;
+	case If:
+		x = expr(s->p1);
+		fprintf(of, "\tjnz ");  /* to be clean, a comparison to 0 should be inserted here */
+		psymb(x);
+		l = lbl;
+		lbl += 3;
+		fprintf(of, ", @l%d, @l%d\n", l, l+1);
+		fprintf(of, "@l%d\n", l);
+		stmt(s->p2);
+		if (s->p3)
+			fprintf(of, "\tjmp @l%d\n", l+2);
+		fprintf(of, "@l%d\n", l+1);
+		if (s->p3) {
+			stmt(s->p3);
+			fprintf(of, "@l%d\n", l+2);
+		}
+		break;
+	case While:
+		x = expr(s->p1);
+		l = lbl;
+		lbl += 3;
+		fprintf(of, "@l%d\n", l);
+		fprintf(of, "\tjnz ");                  /* ditto */
+		psymb(x);
+		fprintf(of, ", @l%d, @l%d\n", l+1, l+2);
+		fprintf(of, "@l%d\n", l+1);
+		stmt(s->p2);
+		fprintf(of, "\tjmp @l%d\n", l);
+		fprintf(of, "@l%d\n", l+2);
+		break;
+	}
+}
+
+Node *
+mknode(char op, Node *l, Node *r)
+{
+	Node *n;
+
+	n = alloc(sizeof *n);
+	n->op = op;
+	n->l = l;
+	n->r = r;
+	return n;
+}
+
+Node *
+mkidx(Node *a, Node *i)
+{
+	Node *n;
+
+	n = mknode('+', a, i);
+	n = mknode('@', n, 0);
+	return n;
+}
+
+Stmt *
+mkstmt(int t, void *p1, void *p2, void *p3)
+{
+	Stmt *s;
+
+	s = alloc(sizeof *s);
+	s->t = t;
+	s->p1 = p1;
+	s->p2 = p2;
+	s->p3 = p3;
+	return s;
+}
+
+%}
+
+%union {
+	Node *n;
+	Stmt *s;
+	unsigned u;
+}
+
+%token <n> NUM
+%token <n> IDENT
+%token PP MM LE GE
+
+%token TINT TLNG
+%token IF ELSE WHILE
+
+%right '='
+%left '+' '-'
+%left '*' '/' '%'
+%nonassoc '&'
+%left EQ NE
+%left '<' '>' LE GE
+%nonassoc PP MM
+%nonassoc '['
+
+%type <u> type
+%type <s> stmt stmts
+%type <n> expr
+
+%%
+
+prog: init '{' dcls stmts '}'
+{
+	stmt($4);
+	fprintf(of, "\tret\n");
+};
+
+init:
+{
+	varclr();
+	lbl = 0;
+	tmp = 0;
+	fprintf(of, "@l%d\n", lbl++);
+};
+
+dcls: | dcls type IDENT ';'
+{
+	int s;
+	char *v;
+
+	v = $3->u.v;
+	s = SIZE($2);
+	varadd(v, $2);
+	fprintf(of, "\t%%%s =l alloc%d %d\n", v, s, s);
+};
+
+type: type '*' { $$ = IDIR($1); }
+    | TINT     { $$ = INT; }
+    | TLNG     { $$ = LNG; }
+    ;
+
+stmt: ';'                            { $$ = 0; }
+    | '{' stmts '}'                  { $$ = $2; }
+    | expr ';'                       { $$ = mkstmt(Expr, $1, 0, 0); }
+    | WHILE '(' expr ')' stmt        { $$ = mkstmt(While, $3, $5, 0); }
+    | IF '(' expr ')' stmt ELSE stmt { $$ = mkstmt(If, $3, $5, $7); }
+    | IF '(' expr ')' stmt           { $$ = mkstmt(If, $3, $5, 0); }
+    ;
+
+stmts: stmts stmt { $$ = mkstmt(Seq, $1, $2, 0); }
+     |            { $$ = 0; }
+     ;
+
+expr: NUM
+    | IDENT
+    | expr '[' expr ']' { $$ = mkidx($1, $3); }
+    | '(' expr ')'      { $$ = $2; }
+    | '*' expr          { $$ = mknode('@', $2, 0); }
+    | '&' expr          { $$ = mknode('&', $2, 0); }
+    | expr '=' expr     { $$ = mknode('=', $1, $3); }
+    | expr '+' expr     { $$ = mknode('+', $1, $3); }
+    | expr '-' expr     { $$ = mknode('-', $1, $3); }
+    | expr '*' expr     { $$ = mknode('*', $1, $3); }
+    | expr '/' expr     { $$ = mknode('/', $1, $3); }
+    | expr '%' expr     { $$ = mknode('%', $1, $3); }
+    | expr '<' expr     { $$ = mknode('<', $1, $3); }
+    | expr '>' expr     { $$ = mknode('<', $3, $1); }
+    | expr LE expr      { $$ = mknode('l', $1, $3); }
+    | expr GE expr      { $$ = mknode('l', $3, $1); }
+    | expr EQ expr      { $$ = mknode('e', $1, $3); }
+    | expr NE expr      { $$ = mknode('n', $1, $3); }
+    | expr PP           { $$ = mknode('P', $1, 0); }
+    | expr MM           { $$ = mknode('M', $1, 0); }
+    ;
+
+%%
+
+int
+yylex()
+{
+	struct {
+		char *s;
+		int t;
+	} kwds[] = {
+		{ "int", TINT },
+		{ "long", TLNG },
+		{ "if", IF },
+		{ "else", ELSE },
+		{ "while", WHILE },
+		{ 0, 0 }
+	};
+	int i, c, c1, n, sgn;
+	char v[NString], *p;
+
+	do
+		c = getchar();
+	while (isspace(c));
+
+	if (c == EOF)
+		return 0;
+
+	if (isdigit(c) || c == '-') {
+		n = 0;
+		sgn = 1;
+		if (c == '-') {
+			sgn = -1;
+			c = getchar();
+			if (!isdigit(c)) {
+				ungetc(c, stdin);
+				return '-';
+			}
+		}
+		do {
+			n *= 10;
+			n += c-'0';
+			c = getchar();
+		} while (isdigit(c));
+		ungetc(c, stdin);
+		yylval.n = mknode('N', 0, 0);
+		yylval.n->u.n = n * sgn;
+		return NUM;
+	}
+
+	if (isalpha(c)) {
+		p = v;
+		do {
+			if (p == &v[NString-1])
+				die("ident too long");
+			*p++ = c;
+			c = getchar();
+		} while (isalpha(c));
+		*p = 0;
+		ungetc(c, stdin);
+		for (i=0; kwds[i].s; i++)
+			if (strcmp(v, kwds[i].s) == 0)
+				return kwds[i].t;
+		yylval.n = mknode('V', 0, 0);
+		strcpy(yylval.n->u.v, v);
+		return IDENT;
+	}
+
+	c1 = getchar();
+#define DI(a, b) a + b*256
+	switch (DI(c,c1)) {
+	case DI('!','='): return NE;
+	case DI('=','='): return EQ;
+	case DI('<','='): return LE;
+	case DI('>','='): return GE;
+	case DI('+','+'): return PP;
+	case DI('-','-'): return MM;
+	}
+#undef DI
+	ungetc(c1, stdin);
+
+	return c;
+}
+
+int
+yyerror(char *err)
+{
+	die("parse error");
+	return 0;
+}
+
+int
+main()
+{
+	of = stdout;
+	if (yyparse() != 0)
+		die("parse error");
+	return 0;
+}