summary refs log tree commit diff
path: root/emit.c
diff options
context:
space:
mode:
authorQuentin Carbonneaux <quentin@c9x.me>2022-10-03 10:40:39 +0200
committerQuentin Carbonneaux <quentin@c9x.me>2022-10-12 21:11:41 +0200
commit577e93fe6d729b63447faad471fd0f5f2296f667 (patch)
tree411f67778f6ced6d6d2b2f45cc9e739d8a004d31 /emit.c
parentb03a8970d7b73959397f0ca5c8f2a532c1905e5d (diff)
downloadroux-577e93fe6d729b63447faad471fd0f5f2296f667.tar.gz
thread-local storage for arm64_apple
It is documented nowhere how this is
supposed to work. It is also quite easy
to have assertion failures pop in the
linker when generating asm slightly
different from clang's!

The best source of information is found
in LLVM's source code (AArch64ISelLowering.cpp).
I paste it here for future reference:

/// Darwin only has one TLS scheme which must be capable of dealing with the
/// fully general situation, in the worst case. This means:
///     + "extern __thread" declaration.
///     + Defined in a possibly unknown dynamic library.
///
/// The general system is that each __thread variable has a [3 x i64] descriptor
/// which contains information used by the runtime to calculate the address. The
/// only part of this the compiler needs to know about is the first xword, which
/// contains a function pointer that must be called with the address of the
/// entire descriptor in "x0".
///
/// Since this descriptor may be in a different unit, in general even the
/// descriptor must be accessed via an indirect load. The "ideal" code sequence
/// is:
///     adrp x0, _var@TLVPPAGE
///     ldr x0, [x0, _var@TLVPPAGEOFF]   ; x0 now contains address of descriptor
///     ldr x1, [x0]                     ; x1 contains 1st entry of descriptor,
///                                      ; the function pointer
///     blr x1                           ; Uses descriptor address in x0
///     ; Address of _var is now in x0.
///
/// If the address of _var's descriptor *is* known to the linker, then it can
/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
/// a slight efficiency gain.

The call 'blr x1' above is actually
special in that it trashes less registers
than what the abi would normally permit.
In qbe, I don't take advantage of this
and lower the call like a regular call.
We can revise this later on. Again, the
source for this information is LLVM's
source code:

// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
Diffstat (limited to 'emit.c')
-rw-r--r--emit.c28
1 files changed, 23 insertions, 5 deletions
diff --git a/emit.c b/emit.c
index 5e0f452..017c461 100644
--- a/emit.c
+++ b/emit.c
@@ -17,8 +17,27 @@ emitlnk(char *n, Lnk *l, int s, FILE *f)
 		[1][SecData] = ".section .tdata,\"awT\"",
 		[1][SecBss] = ".section .tbss,\"awT\"",
 	};
-	char *p;
-
+	char *pfx, *sfx;
+
+	pfx = n[0] == '"' ? "" : T.assym;
+	sfx = "";
+	if (T.apple && l->thread) {
+		l->sec = "__DATA";
+		l->secf = "__thread_data,thread_local_regular";
+		sfx = "$tlv$init";
+		fputs(
+			".section __DATA,__thread_vars,"
+			"thread_local_variables\n",
+			f
+		);
+		fprintf(f, "%s%s:\n", pfx, n);
+		fprintf(f,
+			"\t.quad __tlv_bootstrap\n"
+			"\t.quad 0\n"
+			"\t.quad %s%s%s\n\n",
+			pfx, n, sfx
+		);
+	}
 	if (l->sec) {
 		fprintf(f, ".section %s", l->sec);
 		if (l->secf)
@@ -28,10 +47,9 @@ emitlnk(char *n, Lnk *l, int s, FILE *f)
 	fputc('\n', f);
 	if (l->align)
 		fprintf(f, ".balign %d\n", l->align);
-	p = n[0] == '"' ? "" : T.assym;
 	if (l->export)
-		fprintf(f, ".globl %s%s\n", p, n);
-	fprintf(f, "%s%s:\n", p, n);
+		fprintf(f, ".globl %s%s\n", pfx, n);
+	fprintf(f, "%s%s%s:\n", pfx, n, sfx);
 }
 
 void