Text file
src/runtime/race_arm64.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "tls_arm64.h"
11 #include "cgo/abi_arm64.h"
12
13 // The following thunks allow calling the gcc-compiled race runtime directly
14 // from Go code without going all the way through cgo.
15 // First, it's much faster (up to 50% speedup for real Go programs).
16 // Second, it eliminates race-related special cases from cgocall and scheduler.
17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19 // A brief recap of the arm64 calling convention.
20 // Arguments are passed in R0...R7, the rest is on stack.
21 // Callee-saved registers are: R19...R28.
22 // Temporary registers are: R9...R15
23 // SP must be 16-byte aligned.
24
25 // When calling racecalladdr, R9 is the call target address.
26
27 // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
28
29 // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
30 // No-op on other OSes.
31 #ifdef TLS_darwin
32 #define TP_ALIGN AND $~7, R0
33 #else
34 #define TP_ALIGN
35 #endif
36
37 // Load g from TLS. (See tls_arm64.s)
38 #define load_g \
39 MRS_TPIDR_R0 \
40 TP_ALIGN \
41 MOVD runtime·tls_g(SB), R11 \
42 MOVD (R0)(R11), g
43
44 // func runtime·raceread(addr uintptr)
45 // Called from instrumented code.
46 // Defined as ABIInternal so as to avoid introducing a wrapper,
47 // which would make caller's PC ineffective.
48 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
49 MOVD R0, R1 // addr
50 MOVD LR, R2
51 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
52 MOVD $__tsan_read(SB), R9
53 JMP racecalladdr<>(SB)
54
55 // func runtime·RaceRead(addr uintptr)
56 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
57 // This needs to be a tail call, because raceread reads caller pc.
58 JMP runtime·raceread(SB)
59
60 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
61 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
62 MOVD addr+0(FP), R1
63 MOVD callpc+8(FP), R2
64 MOVD pc+16(FP), R3
65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOVD $__tsan_read_pc(SB), R9
67 JMP racecalladdr<>(SB)
68
69 // func runtime·racewrite(addr uintptr)
70 // Called from instrumented code.
71 // Defined as ABIInternal so as to avoid introducing a wrapper,
72 // which would make caller's PC ineffective.
73 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74 MOVD R0, R1 // addr
75 MOVD LR, R2
76 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
77 MOVD $__tsan_write(SB), R9
78 JMP racecalladdr<>(SB)
79
80 // func runtime·RaceWrite(addr uintptr)
81 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
82 // This needs to be a tail call, because racewrite reads caller pc.
83 JMP runtime·racewrite(SB)
84
85 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
86 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
87 MOVD addr+0(FP), R1
88 MOVD callpc+8(FP), R2
89 MOVD pc+16(FP), R3
90 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
91 MOVD $__tsan_write_pc(SB), R9
92 JMP racecalladdr<>(SB)
93
94 // func runtime·racereadrange(addr, size uintptr)
95 // Called from instrumented code.
96 // Defined as ABIInternal so as to avoid introducing a wrapper,
97 // which would make caller's PC ineffective.
98 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
99 MOVD R1, R2 // size
100 MOVD R0, R1 // addr
101 MOVD LR, R3
102 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
103 MOVD $__tsan_read_range(SB), R9
104 JMP racecalladdr<>(SB)
105
106 // func runtime·RaceReadRange(addr, size uintptr)
107 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
108 // This needs to be a tail call, because racereadrange reads caller pc.
109 JMP runtime·racereadrange(SB)
110
111 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
112 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
113 MOVD addr+0(FP), R1
114 MOVD size+8(FP), R2
115 MOVD pc+16(FP), R3
116 ADD $4, R3 // pc is function start, tsan wants return address.
117 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
118 MOVD $__tsan_read_range(SB), R9
119 JMP racecalladdr<>(SB)
120
121 // func runtime·racewriterange(addr, size uintptr)
122 // Called from instrumented code.
123 // Defined as ABIInternal so as to avoid introducing a wrapper,
124 // which would make caller's PC ineffective.
125 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
126 MOVD R1, R2 // size
127 MOVD R0, R1 // addr
128 MOVD LR, R3
129 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
130 MOVD $__tsan_write_range(SB), R9
131 JMP racecalladdr<>(SB)
132
133 // func runtime·RaceWriteRange(addr, size uintptr)
134 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
135 // This needs to be a tail call, because racewriterange reads caller pc.
136 JMP runtime·racewriterange(SB)
137
138 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
139 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
140 MOVD addr+0(FP), R1
141 MOVD size+8(FP), R2
142 MOVD pc+16(FP), R3
143 ADD $4, R3 // pc is function start, tsan wants return address.
144 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
145 MOVD $__tsan_write_range(SB), R9
146 JMP racecalladdr<>(SB)
147
148 // If addr (R1) is out of range, do nothing.
149 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
150 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
151 load_g
152 MOVD g_racectx(g), R0
153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
154 MOVD runtime·racearenastart(SB), R10
155 CMP R10, R1
156 BLT data
157 MOVD runtime·racearenaend(SB), R10
158 CMP R10, R1
159 BLT call
160 data:
161 MOVD runtime·racedatastart(SB), R10
162 CMP R10, R1
163 BLT ret
164 MOVD runtime·racedataend(SB), R10
165 CMP R10, R1
166 BGT ret
167 call:
168 JMP racecall<>(SB)
169 ret:
170 RET
171
172 // func runtime·racefuncenter(pc uintptr)
173 // Called from instrumented code.
174 TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
175 MOVD R0, R9 // callpc
176 JMP racefuncenter<>(SB)
177
178 // Common code for racefuncenter
179 // R9 = caller's return address
180 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
181 load_g
182 MOVD g_racectx(g), R0 // goroutine racectx
183 MOVD R9, R1
184 // void __tsan_func_enter(ThreadState *thr, void *pc);
185 MOVD $__tsan_func_enter(SB), R9
186 BL racecall<>(SB)
187 RET
188
189 // func runtime·racefuncexit()
190 // Called from instrumented code.
191 TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
192 load_g
193 MOVD g_racectx(g), R0 // race context
194 // void __tsan_func_exit(ThreadState *thr);
195 MOVD $__tsan_func_exit(SB), R9
196 JMP racecall<>(SB)
197
198 // Atomic operations for sync/atomic package.
199 // R3 = addr of arguments passed to this function, it can
200 // be fetched at 40(RSP) in racecallatomic after two times BL
201 // R0, R1, R2 set in racecallatomic
202
203 // Load
204 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
205 GO_ARGS
206 MOVD $__tsan_go_atomic32_load(SB), R9
207 BL racecallatomic<>(SB)
208 RET
209
210 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
211 GO_ARGS
212 MOVD $__tsan_go_atomic64_load(SB), R9
213 BL racecallatomic<>(SB)
214 RET
215
216 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
217 GO_ARGS
218 JMP sync∕atomic·LoadInt32(SB)
219
220 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·LoadInt64(SB)
223
224 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
225 GO_ARGS
226 JMP sync∕atomic·LoadInt64(SB)
227
228 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
229 GO_ARGS
230 JMP sync∕atomic·LoadInt64(SB)
231
232 // Store
233 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
234 GO_ARGS
235 MOVD $__tsan_go_atomic32_store(SB), R9
236 BL racecallatomic<>(SB)
237 RET
238
239 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
240 GO_ARGS
241 MOVD $__tsan_go_atomic64_store(SB), R9
242 BL racecallatomic<>(SB)
243 RET
244
245 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
246 GO_ARGS
247 JMP sync∕atomic·StoreInt32(SB)
248
249 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
250 GO_ARGS
251 JMP sync∕atomic·StoreInt64(SB)
252
253 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
254 GO_ARGS
255 JMP sync∕atomic·StoreInt64(SB)
256
257 // Swap
258 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
259 GO_ARGS
260 MOVD $__tsan_go_atomic32_exchange(SB), R9
261 BL racecallatomic<>(SB)
262 RET
263
264 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
265 GO_ARGS
266 MOVD $__tsan_go_atomic64_exchange(SB), R9
267 BL racecallatomic<>(SB)
268 RET
269
270 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
271 GO_ARGS
272 JMP sync∕atomic·SwapInt32(SB)
273
274 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
275 GO_ARGS
276 JMP sync∕atomic·SwapInt64(SB)
277
278 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
279 GO_ARGS
280 JMP sync∕atomic·SwapInt64(SB)
281
282 // Add
283 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
284 GO_ARGS
285 MOVD $__tsan_go_atomic32_fetch_add(SB), R9
286 BL racecallatomic<>(SB)
287 MOVW add+8(FP), R0 // convert fetch_add to add_fetch
288 MOVW ret+16(FP), R1
289 ADD R0, R1, R0
290 MOVW R0, ret+16(FP)
291 RET
292
293 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
294 GO_ARGS
295 MOVD $__tsan_go_atomic64_fetch_add(SB), R9
296 BL racecallatomic<>(SB)
297 MOVD add+8(FP), R0 // convert fetch_add to add_fetch
298 MOVD ret+16(FP), R1
299 ADD R0, R1, R0
300 MOVD R0, ret+16(FP)
301 RET
302
303 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
304 GO_ARGS
305 JMP sync∕atomic·AddInt32(SB)
306
307 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
308 GO_ARGS
309 JMP sync∕atomic·AddInt64(SB)
310
311 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
312 GO_ARGS
313 JMP sync∕atomic·AddInt64(SB)
314
315 // CompareAndSwap
316 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
317 GO_ARGS
318 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
319 BL racecallatomic<>(SB)
320 RET
321
322 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
323 GO_ARGS
324 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
325 BL racecallatomic<>(SB)
326 RET
327
328 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
329 GO_ARGS
330 JMP sync∕atomic·CompareAndSwapInt32(SB)
331
332 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
333 GO_ARGS
334 JMP sync∕atomic·CompareAndSwapInt64(SB)
335
336 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
337 GO_ARGS
338 JMP sync∕atomic·CompareAndSwapInt64(SB)
339
340 // Generic atomic operation implementation.
341 // R9 = addr of target function
342 TEXT racecallatomic<>(SB), NOSPLIT, $0
343 // Set up these registers
344 // R0 = *ThreadState
345 // R1 = caller pc
346 // R2 = pc
347 // R3 = addr of incoming arg list
348
349 // Trigger SIGSEGV early.
350 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
351 MOVB (R3), R13 // segv here if addr is bad
352 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
353 MOVD runtime·racearenastart(SB), R10
354 CMP R10, R3
355 BLT racecallatomic_data
356 MOVD runtime·racearenaend(SB), R10
357 CMP R10, R3
358 BLT racecallatomic_ok
359 racecallatomic_data:
360 MOVD runtime·racedatastart(SB), R10
361 CMP R10, R3
362 BLT racecallatomic_ignore
363 MOVD runtime·racedataend(SB), R10
364 CMP R10, R3
365 BGE racecallatomic_ignore
366 racecallatomic_ok:
367 // Addr is within the good range, call the atomic function.
368 load_g
369 MOVD g_racectx(g), R0 // goroutine context
370 MOVD 16(RSP), R1 // caller pc
371 MOVD R9, R2 // pc
372 ADD $40, RSP, R3
373 JMP racecall<>(SB) // does not return
374 racecallatomic_ignore:
375 // Addr is outside the good range.
376 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
377 // An attempt to synchronize on the address would cause crash.
378 MOVD R9, R21 // remember the original function
379 MOVD $__tsan_go_ignore_sync_begin(SB), R9
380 load_g
381 MOVD g_racectx(g), R0 // goroutine context
382 BL racecall<>(SB)
383 MOVD R21, R9 // restore the original function
384 // Call the atomic function.
385 // racecall will call LLVM race code which might clobber R28 (g)
386 load_g
387 MOVD g_racectx(g), R0 // goroutine context
388 MOVD 16(RSP), R1 // caller pc
389 MOVD R9, R2 // pc
390 ADD $40, RSP, R3 // arguments
391 BL racecall<>(SB)
392 // Call __tsan_go_ignore_sync_end.
393 MOVD $__tsan_go_ignore_sync_end(SB), R9
394 MOVD g_racectx(g), R0 // goroutine context
395 BL racecall<>(SB)
396 RET
397
398 // func runtime·racecall(void(*f)(...), ...)
399 // Calls C function f from race runtime and passes up to 4 arguments to it.
400 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
401 TEXT runtime·racecall(SB), NOSPLIT, $0-0
402 MOVD fn+0(FP), R9
403 MOVD arg0+8(FP), R0
404 MOVD arg1+16(FP), R1
405 MOVD arg2+24(FP), R2
406 MOVD arg3+32(FP), R3
407 JMP racecall<>(SB)
408
409 // Switches SP to g0 stack and calls (R9). Arguments already set.
410 // Clobbers R19, R20.
411 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
412 MOVD g_m(g), R10
413 // Switch to g0 stack.
414 MOVD RSP, R19 // callee-saved, preserved across the CALL
415 MOVD R30, R20 // callee-saved, preserved across the CALL
416 MOVD m_g0(R10), R11
417 CMP R11, g
418 BEQ call // already on g0
419 MOVD (g_sched+gobuf_sp)(R11), R12
420 MOVD R12, RSP
421 call:
422 BL R9
423 MOVD R19, RSP
424 JMP (R20)
425
426 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
427 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
428 // The overall effect of Go->C->Go call chain is similar to that of mcall.
429 // R0 contains command code. R1 contains command-specific context.
430 // See racecallback for command codes.
431 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
432 // Handle command raceGetProcCmd (0) here.
433 // First, code below assumes that we are on curg, while raceGetProcCmd
434 // can be executed on g0. Second, it is called frequently, so will
435 // benefit from this fast path.
436 CBNZ R0, rest
437 MOVD g, R13
438 #ifdef TLS_darwin
439 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
440 #endif
441 load_g
442 #ifdef TLS_darwin
443 MOVD R12, R27
444 #endif
445 MOVD g_m(g), R0
446 MOVD m_p(R0), R0
447 MOVD p_raceprocctx(R0), R0
448 MOVD R0, (R1)
449 MOVD R13, g
450 JMP (LR)
451 rest:
452 // Save callee-saved registers (Go code won't respect that).
453 // 8(RSP) and 16(RSP) are for args passed through racecallback
454 SUB $176, RSP
455 MOVD LR, 0(RSP)
456
457 SAVE_R19_TO_R28(8*3)
458 SAVE_F8_TO_F15(8*13)
459 MOVD R29, (8*21)(RSP)
460 // Set g = g0.
461 // load_g will clobber R0, Save R0
462 MOVD R0, R13
463 load_g
464 // restore R0
465 MOVD R13, R0
466 MOVD g_m(g), R13
467 MOVD m_g0(R13), R14
468 CMP R14, g
469 BEQ noswitch // branch if already on g0
470 MOVD R14, g
471
472 MOVD R0, 8(RSP) // func arg
473 MOVD R1, 16(RSP) // func arg
474 BL runtime·racecallback(SB)
475
476 // All registers are smashed after Go code, reload.
477 MOVD g_m(g), R13
478 MOVD m_curg(R13), g // g = m->curg
479 ret:
480 // Restore callee-saved registers.
481 MOVD 0(RSP), LR
482 MOVD (8*21)(RSP), R29
483 RESTORE_F8_TO_F15(8*13)
484 RESTORE_R19_TO_R28(8*3)
485 ADD $176, RSP
486 JMP (LR)
487
488 noswitch:
489 // already on g0
490 MOVD R0, 8(RSP) // func arg
491 MOVD R1, 16(RSP) // func arg
492 BL runtime·racecallback(SB)
493 JMP ret
494
495 #ifndef TLSG_IS_VARIABLE
496 // tls_g, g value for each thread in TLS
497 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
498 #endif
499
View as plain text