diff options
author | Quentin Carbonneaux <quentin@c9x.me> | 2021-12-05 21:25:00 +0100 |
---|---|---|
committer | Quentin Carbonneaux <quentin@c9x.me> | 2021-12-05 22:06:23 +0100 |
commit | 367c8215d99054892740ad74c690b106c45ebf60 (patch) | |
tree | 3ef0d974a740524e684b73ea26c59cafb4ca9416 | |
parent | bf153b359e9ce3ebef9bca899eb7ed5bd9045c11 (diff) | |
download | roux-367c8215d99054892740ad74c690b106c45ebf60.tar.gz |
arm64: fix slots with offset >32k
When slots are used with a large offset, the emitter generates invalid assembly code. That is caught later on by the assembler, but it prevents compilation of programs with large stack frames. When a slot offset is too large to be expressed as a constant offset to x29 (the frame pointer), emitins() inserts a late Oaddr instruction to x16 and replaces the large slot reference with x16. This change also gave me the opportunity to refactor the save/restore logic for callee-save registers. This fixes the following Hare issue: https://todo.sr.ht/~sircmpwn/hare/387
-rw-r--r-- | arm64/emit.c | 88 |
1 files changed, 41 insertions, 47 deletions
diff --git a/arm64/emit.c b/arm64/emit.c index 752b455..bd0ebdc 100644 --- a/arm64/emit.c +++ b/arm64/emit.c @@ -228,7 +228,7 @@ emitf(char *s, Ins *i, E *e) fprintf(e->f, "[%s]", rname(r.val, Kl)); break; case RSlot: - fprintf(e->f, "[sp, %"PRIu64"]", slot(r.val, e)); + fprintf(e->f, "[x29, %"PRIu64"]", slot(r.val, e)); break; } break; @@ -276,6 +276,26 @@ loadcon(Con *c, int r, int k, FILE *f) } } +static void emitins(Ins *, E *); + +static void +fixarg(Ref *pr, E *e) +{ + Ins *i; + Ref r; + uint64_t s; + + r = *pr; + if (rtype(r) == RSlot) { + s = slot(r.val, e); + if (s > 32760) { + i = &(Ins){Oaddr, Kl, TMP(IP0), {r}}; + emitins(i, e); + *pr = TMP(IP0); + } + } +} + static void emitins(Ins *i, E *e) { @@ -285,6 +305,10 @@ emitins(Ins *i, E *e) switch (i->op) { default: + if (isload(i->op)) + fixarg(&i->arg[0], e); + if (isstore(i->op)) + fixarg(&i->arg[1], e); Table: /* most instructions are just pulled out of * the table omap[], some special cases are @@ -409,9 +433,9 @@ arm64_emitfn(Fn *fn, FILE *out) #undef X }; static int id0; - int n, c, lbl, *r; + int s, n, c, lbl, *r; uint64_t o; - Blk *b, *s; + Blk *b, *t; Ins *i; E *e; @@ -457,28 +481,13 @@ arm64_emitfn(Fn *fn, FILE *out) e->frame & 0xFFFF, e->frame >> 16 ); fputs("\tadd\tx29, sp, 0\n", e->f); - for (o=e->frame+16, r=arm64_rclob; *r>=0; r++) + s = (e->frame - e->padding) / 4; + for (r=arm64_rclob; *r>=0; r++) if (e->fn->reg & BIT(*r)) { - if (o <= 32760) - fprintf(e->f, - "\tstr\t%s, [sp, %"PRIu64"]\n", - rname(*r, Kx), o -= 8 - ); - else if (o <= 65535) - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tstr\t%s, [sp, x16]\n", - o -= 8, rname(*r, Kx) - ); - else { - o -= 8; - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tmovk\tx16, #%"PRIu64", lsl #16\n" - "\tstr\t%s, [sp, x16]\n", - o & 0xFFFF, o >> 16, rname(*r, Kx) - ); - } + s -= 2; + i = &(Ins){.arg = {TMP(*r), SLOT(s)}}; + i->op = *r >= V0 ? Ostored : Ostorel; + emitins(i, e); } for (lbl=0, b=e->fn->start; b; b=b->link) { @@ -489,28 +498,13 @@ arm64_emitfn(Fn *fn, FILE *out) lbl = 1; switch (b->jmp.type) { case Jret0: - for (o=e->frame+16, r=arm64_rclob; *r>=0; r++) + s = (e->frame - e->padding) / 4; + for (r=arm64_rclob; *r>=0; r++) if (e->fn->reg & BIT(*r)) { - if (o <= 32760) - fprintf(e->f, - "\tldr\t%s, [sp, %"PRIu64"]\n", - rname(*r, Kx), o -= 8 - ); - else if (o <= 65535) - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tldr\t%s, [sp, x16]\n", - o -= 8, rname(*r, Kx) - ); - else { - o -= 8; - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tmovk\tx16, #%"PRIu64", lsl #16\n" - "\tldr\t%s, [sp, x16]\n", - o & 0xFFFF, o >> 16, rname(*r, Kx) - ); - } + s -= 2; + i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}}; + i->cls = *r >= V0 ? Kd : Kl; + emitins(i, e); } o = e->frame + 16; if (e->fn->vararg) @@ -555,9 +549,9 @@ arm64_emitfn(Fn *fn, FILE *out) if (c < 0 || c > NCmp) die("unhandled jump %d", b->jmp.type); if (b->link == b->s2) { - s = b->s1; + t = b->s1; b->s1 = b->s2; - b->s2 = s; + b->s2 = t; } else c = cmpneg(c); fprintf(e->f, "\tb%s\t.L%d\n", ctoa[c], id0+b->s2->id); |