summary refs log tree commit diff
path: root/amd64/isel.c
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin@c9x.me>2022-10-12 20:59:20 +0200
committerQuentin Carbonneaux <quentin@c9x.me>2022-10-12 21:12:08 +0200
commit8ecae922997c55f70cd9e19cbf947a520f7ecca3 (patch)
treef75685ee79cc015883b2a60d7a5cbf6c52c751d5 /amd64/isel.c
parent577e93fe6d729b63447faad471fd0f5f2296f667 (diff)
downloadroux-8ecae922997c55f70cd9e19cbf947a520f7ecca3.tar.gz
thread-local storage for amd64_apple
It is quite similar to arm64_apple.
Probably, the call that needs to be
generated also provides extra
invariants on top of the regular
abi, but I have not checked that.

Clang generates code that is a bit
neater than qbe's because, on x86,
a load can be fused in a call
instruction! We do not bother with
supporting these since we expect
only sporadic use of the feature.

For reference, here is what clang
might output for a store to the
second entry of a thread-local
array of ints:

        movq    _x@TLVP(%rip), %rdi
        callq   *(%rdi)
        movl    %ecx, 4(%rax)
Diffstat (limited to 'amd64/isel.c')
-rw-r--r--amd64/isel.c25
1 files changed, 24 insertions, 1 deletions
diff --git a/amd64/isel.c b/amd64/isel.c
index a562441..8c89378 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -63,7 +63,8 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
 {
 	char buf[32];
 	Addr a, *m;
-	Ref r0, r1;
+	Con cc, *c;
+	Ref r0, r1, r2, r3;
 	int s, n, op;
 
 	r1 = r0 = *r;
@@ -121,6 +122,28 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
 			m->offset.type = CUndef;
 			m->base = r0;
 		}
+	} else if (T.apple && rtype(r0) == RCon
+	&& (c = &fn->con[r0.val])->type == CAddr
+	&& c->reloc == RelThr) {
+		r1 = newtmp("isel", Kl, fn);
+		if (c->bits.i) {
+			r2 = newtmp("isel", Kl, fn);
+			cc = (Con){.type = CBits};
+			cc.bits.i = c->bits.i;
+			r3 = newcon(&cc, fn);
+			emit(Oadd, Kl, r1, r2, r3);
+		} else
+			r2 = r1;
+		emit(Ocopy, Kl, r2, TMP(RAX), R);
+		r2 = newtmp("isel", Kl, fn);
+		r3 = newtmp("isel", Kl, fn);
+		emit(Ocall, 0, R, r3, CALL(17));
+		emit(Ocopy, Kl, TMP(RDI), r2, R);
+		emit(Oload, Kl, r3, r2, R);
+		cc = *c;
+		cc.bits.i = 0;
+		r3 = newcon(&cc, fn);
+		emit(Oload, Kl, r2, r3, R);
 	}
 	*r = r1;
 }