summary refs log tree commit diff
path: root/amd64
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin@c9x.me>2019-05-04 13:17:48 +0200
committerQuentin Carbonneaux <quentin@c9x.me>2019-05-04 13:17:48 +0200
commitb1063d46e8c625a6aab4ee36d68e8514a6aa1493 (patch)
treea4708e7eb45ef3ede4116fb746cc6e37cc4fa079 /amd64
parentc8ffe7262f28022854a504cb8260e0cfdd8c55ed (diff)
downloadroux-b1063d46e8c625a6aab4ee36d68e8514a6aa1493.tar.gz
emit only one epilog per function
Previously, each ret would lead to an
epilog.  This caused bloat for large
functions with multiple return points.
Diffstat (limited to 'amd64')
-rw-r--r--amd64/emit.c43
1 files changed, 25 insertions, 18 deletions
diff --git a/amd64/emit.c b/amd64/emit.c
index d4bd54c..f986cb8 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -537,7 +537,7 @@ amd64_emitfn(Fn *fn, FILE *f)
 	static int id0;
 	Blk *b, *s;
 	Ins *i, itmp;
-	int *r, c, o, n, lbl;
+	int *r, c, o, n, lbl, ret;
 	uint64_t fs;
 
 	fprintf(f, ".text\n");
@@ -566,7 +566,7 @@ amd64_emitfn(Fn *fn, FILE *f)
 			fs += 8;
 		}
 
-	for (lbl=0, b=fn->start; b; b=b->link) {
+	for (ret=lbl=0, b=fn->start; b; b=b->link) {
 		if (lbl || b->npred > 1)
 			fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
 		for (i=b->ins; i!=&b->ins[b->nins]; i++)
@@ -574,21 +574,11 @@ amd64_emitfn(Fn *fn, FILE *f)
 		lbl = 1;
 		switch (b->jmp.type) {
 		case Jret0:
-			if (fn->dynalloc)
-				fprintf(f,
-					"\tmovq %%rbp, %%rsp\n"
-					"\tsubq $%"PRIu64", %%rsp\n",
-					fs
-				);
-			for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
-				if (fn->reg & BIT(*--r)) {
-					itmp.arg[0] = TMP(*r);
-					emitf("popq %L0", &itmp, fn, f);
-				}
-			fprintf(f,
-				"\tleave\n"
-				"\tret\n"
-			);
+			if (b->link) {
+				ret++;
+				fprintf(f, "\tjmp %sbb%d\n",
+					gasloc, id0+fn->nblk);
+			}
 			break;
 		case Jjmp:
 		Jmp:
@@ -614,5 +604,22 @@ amd64_emitfn(Fn *fn, FILE *f)
 			die("unhandled jump %d", b->jmp.type);
 		}
 	}
-	id0 += fn->nblk;
+	if (ret)
+		fprintf(f, "%sbb%d:\n", gasloc, id0+fn->nblk);
+	if (fn->dynalloc)
+		fprintf(f,
+			"\tmovq %%rbp, %%rsp\n"
+			"\tsubq $%"PRIu64", %%rsp\n",
+			fs
+		);
+	for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
+		if (fn->reg & BIT(*--r)) {
+			itmp.arg[0] = TMP(*r);
+			emitf("popq %L0", &itmp, fn, f);
+		}
+	fprintf(f,
+		"\tleave\n"
+		"\tret\n"
+	);
+	id0 += fn->nblk + (ret>0);
 }