aboutsummaryrefslogtreecommitdiffstats
path: root/main/musl/fix-TLS-layout-of-TLS-variant-I-when-there-is-gap-above-TP.patch
blob: ce82a6fd286dd973774434a0d021a02052d1bd42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
From 610c5a8524c3d6cd3ac5a5f1231422e7648a3791 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 2 Jun 2018 01:52:01 +0200
Subject: fix TLS layout of TLS variant I when there is a gap above TP

In TLS variant I the TLS is above TP (or above a fixed offset from TP)
but on some targets there is a reserved gap above TP before TLS starts.

This matters for the local-exec tls access model when the offsets of
TLS variables from the TP are hard coded by the linker into the
executable, so the libc must compute these offsets the same way as the
linker.  The tls offset of the main module has to be

	alignup(GAP_ABOVE_TP, main_tls_align).

If there is no TLS in the main module then the gap can be ignored
since musl does not use it and the tls access models of shared
libraries are not affected.

The previous setup only worked if (tls_align & -GAP_ABOVE_TP) == 0
(i.e. TLS did not require large alignment) because the gap was
treated as a fixed offset from TP.  Now the TP points at the end
of the pthread struct (which is aligned) and there is a gap above
it (which may also need alignment).

The fix required changing TP_ADJ and __pthread_self on affected
targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
access the dtv changed too.

Patch-Source: https://git.musl-libc.org/cgit/musl/commit/?id=610c5a8524c3d6cd3ac5a5f1231422e7648a3791
See-Also: https://github.com/rust-lang/rust/issues/48967
---
 arch/aarch64/pthread_arch.h   |  5 +++--
 arch/aarch64/reloc.h          |  2 +-
 arch/arm/pthread_arch.h       |  7 ++++---
 arch/arm/reloc.h              |  2 +-
 arch/mips/pthread_arch.h      |  1 +
 arch/mips64/pthread_arch.h    |  1 +
 arch/mipsn32/pthread_arch.h   |  1 +
 arch/or1k/pthread_arch.h      |  1 +
 arch/powerpc/pthread_arch.h   |  1 +
 arch/powerpc64/pthread_arch.h |  1 +
 arch/sh/pthread_arch.h        |  5 +++--
 arch/sh/reloc.h               |  2 +-
 ldso/dynlink.c                |  5 +++--
 src/env/__init_tls.c          | 10 ++++++++--
 src/ldso/aarch64/tlsdesc.s    |  5 ++---
 15 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/arch/aarch64/pthread_arch.h b/arch/aarch64/pthread_arch.h
index b2e2d8f..e8499d8 100644
--- a/arch/aarch64/pthread_arch.h
+++ b/arch/aarch64/pthread_arch.h
@@ -2,10 +2,11 @@ static inline struct pthread *__pthread_self()
 {
 	char *self;
 	__asm__ __volatile__ ("mrs %0,tpidr_el0" : "=r"(self));
-	return (void*)(self + 16 - sizeof(struct pthread));
+	return (void*)(self - sizeof(struct pthread));
 }
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16)
+#define GAP_ABOVE_TP 16
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC pc
diff --git a/arch/aarch64/reloc.h b/arch/aarch64/reloc.h
index 51b66e2..40cf0b2 100644
--- a/arch/aarch64/reloc.h
+++ b/arch/aarch64/reloc.h
@@ -10,7 +10,7 @@
 
 #define NO_LEGACY_INITFINI
 
-#define TPOFF_K 16
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_AARCH64_ABS64
 #define REL_GOT         R_AARCH64_GLOB_DAT
diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h
index 6657e19..8f2ae8f 100644
--- a/arch/arm/pthread_arch.h
+++ b/arch/arm/pthread_arch.h
@@ -5,7 +5,7 @@ static inline pthread_t __pthread_self()
 {
 	char *p;
 	__asm__ __volatile__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(p) );
-	return (void *)(p+8-sizeof(struct pthread));
+	return (void *)(p-sizeof(struct pthread));
 }
 
 #else
@@ -21,12 +21,13 @@ static inline pthread_t __pthread_self()
 	extern uintptr_t __attribute__((__visibility__("hidden"))) __a_gettp_ptr;
 	register uintptr_t p __asm__("r0");
 	__asm__ __volatile__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" );
-	return (void *)(p+8-sizeof(struct pthread));
+	return (void *)(p-sizeof(struct pthread));
 }
 
 #endif
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+#define GAP_ABOVE_TP 8
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC arm_pc
diff --git a/arch/arm/reloc.h b/arch/arm/reloc.h
index b175711..4b00bf6 100644
--- a/arch/arm/reloc.h
+++ b/arch/arm/reloc.h
@@ -16,7 +16,7 @@
 
 #define NO_LEGACY_INITFINI
 
-#define TPOFF_K 8
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_ARM_ABS32
 #define REL_GOT         R_ARM_GLOB_DAT
diff --git a/arch/mips/pthread_arch.h b/arch/mips/pthread_arch.h
index e581265..5fea15a 100644
--- a/arch/mips/pthread_arch.h
+++ b/arch/mips/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h
index e581265..5fea15a 100644
--- a/arch/mips64/pthread_arch.h
+++ b/arch/mips64/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/mipsn32/pthread_arch.h b/arch/mipsn32/pthread_arch.h
index e581265..5fea15a 100644
--- a/arch/mipsn32/pthread_arch.h
+++ b/arch/mipsn32/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/or1k/pthread_arch.h b/arch/or1k/pthread_arch.h
index 7decd76..521b9c5 100644
--- a/arch/or1k/pthread_arch.h
+++ b/arch/or1k/pthread_arch.h
@@ -12,6 +12,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC regs.pc
diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h
index 7c5c4fa..79e5a09 100644
--- a/arch/powerpc/pthread_arch.h
+++ b/arch/powerpc/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
                         
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/powerpc64/pthread_arch.h b/arch/powerpc64/pthread_arch.h
index 2f976fe..37b75e2 100644
--- a/arch/powerpc64/pthread_arch.h
+++ b/arch/powerpc64/pthread_arch.h
@@ -6,6 +6,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/sh/pthread_arch.h b/arch/sh/pthread_arch.h
index 2756e7e..41fefac 100644
--- a/arch/sh/pthread_arch.h
+++ b/arch/sh/pthread_arch.h
@@ -2,10 +2,11 @@ static inline struct pthread *__pthread_self()
 {
 	char *self;
 	__asm__ __volatile__ ("stc gbr,%0" : "=r" (self) );
-	return (struct pthread *) (self + 8 - sizeof(struct pthread));
+	return (struct pthread *) (self - sizeof(struct pthread));
 }
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+#define GAP_ABOVE_TP 8
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC sc_pc
diff --git a/arch/sh/reloc.h b/arch/sh/reloc.h
index 0238ce0..a1f16cb 100644
--- a/arch/sh/reloc.h
+++ b/arch/sh/reloc.h
@@ -20,7 +20,7 @@
 
 #define LDSO_ARCH "sh" ENDIAN_SUFFIX FP_SUFFIX ABI_SUFFIX
 
-#define TPOFF_K 8
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_SH_DIR32
 #define REL_OFFSET      R_SH_REL32
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index cea5f45..c6216b7 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -1594,8 +1594,9 @@ _Noreturn void __dls3(size_t *sp)
 		libc.tls_head = tls_tail = &app.tls;
 		app.tls_id = tls_cnt = 1;
 #ifdef TLS_ABOVE_TP
-		app.tls.offset = 0;
-		tls_offset = app.tls.size
+		app.tls.offset = GAP_ABOVE_TP;
+		app.tls.offset += -GAP_ABOVE_TP & (app.tls.align-1);
+		tls_offset = app.tls.offset + app.tls.size
 			+ ( -((uintptr_t)app.tls.image + app.tls.size)
 			& (app.tls.align-1) );
 #else
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index 1c5d98a..31d324a 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -104,13 +104,19 @@ static void static_init_tls(size_t *aux)
 
 	main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
 		& (main_tls.align-1);
-	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
-#ifndef TLS_ABOVE_TP
+#ifdef TLS_ABOVE_TP
+	main_tls.offset = GAP_ABOVE_TP;
+	main_tls.offset += -GAP_ABOVE_TP & (main_tls.align-1);
+#else
 	main_tls.offset = main_tls.size;
 #endif
+	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
 
 	libc.tls_align = main_tls.align;
 	libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
+#ifdef TLS_ABOVE_TP
+		+ main_tls.offset
+#endif
 		+ main_tls.size + main_tls.align
 		+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
 
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index 8ed5c26..8e4004d 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -14,7 +14,7 @@ __tlsdesc_static:
 // size_t __tlsdesc_dynamic(size_t *a)
 // {
 // 	struct {size_t modidx,off;} *p = (void*)a[1];
-// 	size_t *dtv = *(size_t**)(tp + 16 - 8);
+// 	size_t *dtv = *(size_t**)(tp - 8);
 // 	if (p->modidx <= dtv[0])
 // 		return dtv[p->modidx] + p->off - tp;
 // 	return __tls_get_new(p) - tp;
@@ -28,8 +28,7 @@ __tlsdesc_dynamic:
 	mrs x1,tpidr_el0      // tp
 	ldr x0,[x0,#8]        // p
 	ldr x2,[x0]           // p->modidx
-	add x3,x1,#8
-	ldr x3,[x3]           // dtv
+	ldr x3,[x1,#-8]       // dtv
 	ldr x4,[x3]           // dtv[0]
 	cmp x2,x4
 	b.hi 1f
-- 
cgit v0.11.2