Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/syscall"
12 "unsafe"
13 )
14
15
16
17
18 const sigPerThreadSyscall = _SIGRTMIN + 1
19
20 type mOS struct {
21
22
23
24
25
26
27
28 profileTimer int32
29 profileTimerValid atomic.Bool
30
31
32
33 needPerThreadSyscall atomic.Uint8
34
35
36
37 vgetrandomState uintptr
38
39 waitsema uint32
40 }
41
42
43 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
44
45
46
47
48
49
50
51
52
53
54 const (
55 _FUTEX_PRIVATE_FLAG = 128
56 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
57 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
58 )
59
60
61
62
63
64
65
66
67
68 func futexsleep(addr *uint32, val uint32, ns int64) {
69
70
71
72
73
74 if ns < 0 {
75 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
76 return
77 }
78
79 var ts timespec
80 ts.setNsec(ns)
81 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
82 }
83
84
85
86
87 func futexwakeup(addr *uint32, cnt uint32) {
88 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
89 if ret >= 0 {
90 return
91 }
92
93
94
95
96 systemstack(func() {
97 print("futexwakeup addr=", addr, " returned ", ret, "\n")
98 })
99
100 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
101 }
102
103 func getproccount() int32 {
104
105
106
107
108
109
110
111 const maxCPUs = 64 * 1024
112 var buf [maxCPUs / 8]byte
113 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
114 if r < 0 {
115 return 1
116 }
117 n := int32(0)
118 for _, v := range buf[:r] {
119 for v != 0 {
120 n += int32(v & 1)
121 v >>= 1
122 }
123 }
124 if n == 0 {
125 n = 1
126 }
127 return n
128 }
129
130
131 const (
132 _CLONE_VM = 0x100
133 _CLONE_FS = 0x200
134 _CLONE_FILES = 0x400
135 _CLONE_SIGHAND = 0x800
136 _CLONE_PTRACE = 0x2000
137 _CLONE_VFORK = 0x4000
138 _CLONE_PARENT = 0x8000
139 _CLONE_THREAD = 0x10000
140 _CLONE_NEWNS = 0x20000
141 _CLONE_SYSVSEM = 0x40000
142 _CLONE_SETTLS = 0x80000
143 _CLONE_PARENT_SETTID = 0x100000
144 _CLONE_CHILD_CLEARTID = 0x200000
145 _CLONE_UNTRACED = 0x800000
146 _CLONE_CHILD_SETTID = 0x1000000
147 _CLONE_STOPPED = 0x2000000
148 _CLONE_NEWUTS = 0x4000000
149 _CLONE_NEWIPC = 0x8000000
150
151
152
153
154
155
156
157
158 cloneFlags = _CLONE_VM |
159 _CLONE_FS |
160 _CLONE_FILES |
161 _CLONE_SIGHAND |
162 _CLONE_SYSVSEM |
163 _CLONE_THREAD
164 )
165
166
167 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
168
169
170
171
172 func newosproc(mp *m) {
173 stk := unsafe.Pointer(mp.g0.stack.hi)
174
177 if false {
178 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
179 }
180
181
182
183 var oset sigset
184 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
185 ret := retryOnEAGAIN(func() int32 {
186 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
187
188
189 if r >= 0 {
190 return 0
191 }
192 return -r
193 })
194 sigprocmask(_SIG_SETMASK, &oset, nil)
195
196 if ret != 0 {
197 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
198 if ret == _EAGAIN {
199 println("runtime: may need to increase max user processes (ulimit -u)")
200 }
201 throw("newosproc")
202 }
203 }
204
205
206
207
208 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
209 stack := sysAlloc(stacksize, &memstats.stacks_sys)
210 if stack == nil {
211 writeErrStr(failallocatestack)
212 exit(1)
213 }
214 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
215 if ret < 0 {
216 writeErrStr(failthreadcreate)
217 exit(1)
218 }
219 }
220
221 const (
222 _AT_NULL = 0
223 _AT_PAGESZ = 6
224 _AT_PLATFORM = 15
225 _AT_HWCAP = 16
226 _AT_SECURE = 23
227 _AT_RANDOM = 25
228 _AT_HWCAP2 = 26
229 )
230
231 var procAuxv = []byte("/proc/self/auxv\x00")
232
233 var addrspace_vec [1]byte
234
235 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
236
237 var auxvreadbuf [128]uintptr
238
239 func sysargs(argc int32, argv **byte) {
240 n := argc + 1
241
242
243 for argv_index(argv, n) != nil {
244 n++
245 }
246
247
248 n++
249
250
251 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
252
253 if pairs := sysauxv(auxvp[:]); pairs != 0 {
254 auxv = auxvp[: pairs*2 : pairs*2]
255 return
256 }
257
258
259
260 fd := open(&procAuxv[0], 0 , 0)
261 if fd < 0 {
262
263
264
265 const size = 256 << 10
266 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
267 if err != 0 {
268 return
269 }
270 var n uintptr
271 for n = 4 << 10; n < size; n <<= 1 {
272 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
273 if err == 0 {
274 physPageSize = n
275 break
276 }
277 }
278 if physPageSize == 0 {
279 physPageSize = size
280 }
281 munmap(p, size)
282 return
283 }
284
285 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
286 closefd(fd)
287 if n < 0 {
288 return
289 }
290
291
292 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
293 pairs := sysauxv(auxvreadbuf[:])
294 auxv = auxvreadbuf[: pairs*2 : pairs*2]
295 }
296
297
298 var secureMode bool
299
300 func sysauxv(auxv []uintptr) (pairs int) {
301
302
303 var i int
304 for ; auxv[i] != _AT_NULL; i += 2 {
305 tag, val := auxv[i], auxv[i+1]
306 switch tag {
307 case _AT_RANDOM:
308
309
310
311
312
313
314 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
315
316 case _AT_PAGESZ:
317 physPageSize = val
318
319 case _AT_SECURE:
320 secureMode = val == 1
321 }
322
323 archauxv(tag, val)
324 vdsoauxv(tag, val)
325 }
326 return i / 2
327 }
328
329 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
330
331 func getHugePageSize() uintptr {
332 var numbuf [20]byte
333 fd := open(&sysTHPSizePath[0], 0 , 0)
334 if fd < 0 {
335 return 0
336 }
337 ptr := noescape(unsafe.Pointer(&numbuf[0]))
338 n := read(fd, ptr, int32(len(numbuf)))
339 closefd(fd)
340 if n <= 0 {
341 return 0
342 }
343 n--
344 v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
345 if !ok || v < 0 {
346 v = 0
347 }
348 if v&(v-1) != 0 {
349
350 return 0
351 }
352 return uintptr(v)
353 }
354
355 func osinit() {
356 ncpu = getproccount()
357 physHugePageSize = getHugePageSize()
358 osArchInit()
359 vgetrandomInit()
360 }
361
362 var urandom_dev = []byte("/dev/urandom\x00")
363
364 func readRandom(r []byte) int {
365
366
367 fd := open(&urandom_dev[0], 0 , 0)
368 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
369 closefd(fd)
370 return int(n)
371 }
372
373 func goenvs() {
374 goenvs_unix()
375 }
376
377
378
379
380
381
382
383 func libpreinit() {
384 initsig(true)
385 }
386
387
388
389 func mpreinit(mp *m) {
390 mp.gsignal = malg(32 * 1024)
391 mp.gsignal.m = mp
392 }
393
394 func gettid() uint32
395
396
397
398 func minit() {
399 minitSignals()
400
401
402
403
404 getg().m.procid = uint64(gettid())
405 }
406
407
408
409
410 func unminit() {
411 unminitSignals()
412 getg().m.procid = 0
413 }
414
415
416
417 func mdestroy(mp *m) {
418 if mp.vgetrandomState != 0 {
419 vgetrandomPutState(mp.vgetrandomState)
420 mp.vgetrandomState = 0
421 }
422 }
423
424
425
426
427
428 func sigreturn__sigaction()
429 func sigtramp()
430 func cgoSigtramp()
431
432
433 func sigaltstack(new, old *stackt)
434
435
436 func setitimer(mode int32, new, old *itimerval)
437
438
439 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
440
441
442 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
443
444
445 func timer_delete(timerid int32) int32
446
447
448 func rtsigprocmask(how int32, new, old *sigset, size int32)
449
450
451
452 func sigprocmask(how int32, new, old *sigset) {
453 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
454 }
455
456 func raise(sig uint32)
457 func raiseproc(sig uint32)
458
459
460 func sched_getaffinity(pid, len uintptr, buf *byte) int32
461 func osyield()
462
463
464 func osyield_no_g() {
465 osyield()
466 }
467
468 func pipe2(flags int32) (r, w int32, errno int32)
469
470
471 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
472 r, _, err := syscall.Syscall6(syscall.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
473 return int32(r), int32(err)
474 }
475
476 const (
477 _si_max_size = 128
478 _sigev_max_size = 64
479 )
480
481
482
483 func setsig(i uint32, fn uintptr) {
484 var sa sigactiont
485 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
486 sigfillset(&sa.sa_mask)
487
488
489
490 if GOARCH == "386" || GOARCH == "amd64" {
491 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
492 }
493 if fn == abi.FuncPCABIInternal(sighandler) {
494 if iscgo {
495 fn = abi.FuncPCABI0(cgoSigtramp)
496 } else {
497 fn = abi.FuncPCABI0(sigtramp)
498 }
499 }
500 sa.sa_handler = fn
501 sigaction(i, &sa, nil)
502 }
503
504
505
506 func setsigstack(i uint32) {
507 var sa sigactiont
508 sigaction(i, nil, &sa)
509 if sa.sa_flags&_SA_ONSTACK != 0 {
510 return
511 }
512 sa.sa_flags |= _SA_ONSTACK
513 sigaction(i, &sa, nil)
514 }
515
516
517
518 func getsig(i uint32) uintptr {
519 var sa sigactiont
520 sigaction(i, nil, &sa)
521 return sa.sa_handler
522 }
523
524
525
526
527 func setSignalstackSP(s *stackt, sp uintptr) {
528 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
529 }
530
531
532 func (c *sigctxt) fixsigcode(sig uint32) {
533 }
534
535
536
537
538 func sysSigaction(sig uint32, new, old *sigactiont) {
539 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
540
541
542
543
544
545
546
547
548
549
550
551 if sig != 32 && sig != 33 && sig != 64 {
552
553 systemstack(func() {
554 throw("sigaction failed")
555 })
556 }
557 }
558 }
559
560
561
562
563 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
564
565 func getpid() int
566 func tgkill(tgid, tid, sig int)
567
568
569 func signalM(mp *m, sig int) {
570 tgkill(getpid(), int(mp.procid), sig)
571 }
572
573
574
575
576
577
578
579
580 func validSIGPROF(mp *m, c *sigctxt) bool {
581 code := int32(c.sigcode())
582 setitimer := code == _SI_KERNEL
583 timer_create := code == _SI_TIMER
584
585 if !(setitimer || timer_create) {
586
587
588
589 return true
590 }
591
592 if mp == nil {
593
594
595
596
597
598
599
600
601
602
603
604
605 return setitimer
606 }
607
608
609
610 if mp.profileTimerValid.Load() {
611
612
613
614
615
616 return timer_create
617 }
618
619
620 return setitimer
621 }
622
623 func setProcessCPUProfiler(hz int32) {
624 setProcessCPUProfilerTimer(hz)
625 }
626
627 func setThreadCPUProfiler(hz int32) {
628 mp := getg().m
629 mp.profilehz = hz
630
631
632 if mp.profileTimerValid.Load() {
633 timerid := mp.profileTimer
634 mp.profileTimerValid.Store(false)
635 mp.profileTimer = 0
636
637 ret := timer_delete(timerid)
638 if ret != 0 {
639 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
640 throw("timer_delete")
641 }
642 }
643
644 if hz == 0 {
645
646 return
647 }
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668 spec := new(itimerspec)
669 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
670 spec.it_interval.setNsec(1e9 / int64(hz))
671
672 var timerid int32
673 var sevp sigevent
674 sevp.notify = _SIGEV_THREAD_ID
675 sevp.signo = _SIGPROF
676 sevp.sigev_notify_thread_id = int32(mp.procid)
677 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
678 if ret != 0 {
679
680
681 return
682 }
683
684 ret = timer_settime(timerid, 0, spec, nil)
685 if ret != 0 {
686 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
687 ", 0, {interval: {",
688 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
689 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
690 throw("timer_settime")
691 }
692
693 mp.profileTimer = timerid
694 mp.profileTimerValid.Store(true)
695 }
696
697
698
699 type perThreadSyscallArgs struct {
700 trap uintptr
701 a1 uintptr
702 a2 uintptr
703 a3 uintptr
704 a4 uintptr
705 a5 uintptr
706 a6 uintptr
707 r1 uintptr
708 r2 uintptr
709 }
710
711
712
713
714
715
716 var perThreadSyscall perThreadSyscallArgs
717
718
719
720
721
722
723
724
725
726 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
727 if iscgo {
728
729 panic("doAllThreadsSyscall not supported with cgo enabled")
730 }
731
732
733
734
735
736
737
738
739 stw := stopTheWorld(stwAllThreadsSyscall)
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761 allocmLock.lock()
762
763
764
765
766
767
768 acquirem()
769
770
771
772
773
774
775 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
776 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
777
778 r2 = 0
779 }
780 if errno != 0 {
781 releasem(getg().m)
782 allocmLock.unlock()
783 startTheWorld(stw)
784 return r1, r2, errno
785 }
786
787 perThreadSyscall = perThreadSyscallArgs{
788 trap: trap,
789 a1: a1,
790 a2: a2,
791 a3: a3,
792 a4: a4,
793 a5: a5,
794 a6: a6,
795 r1: r1,
796 r2: r2,
797 }
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834 for mp := allm; mp != nil; mp = mp.alllink {
835 for atomic.Load64(&mp.procid) == 0 {
836
837 osyield()
838 }
839 }
840
841
842
843 gp := getg()
844 tid := gp.m.procid
845 for mp := allm; mp != nil; mp = mp.alllink {
846 if atomic.Load64(&mp.procid) == tid {
847
848 continue
849 }
850 mp.needPerThreadSyscall.Store(1)
851 signalM(mp, sigPerThreadSyscall)
852 }
853
854
855 for mp := allm; mp != nil; mp = mp.alllink {
856 if mp.procid == tid {
857 continue
858 }
859 for mp.needPerThreadSyscall.Load() != 0 {
860 osyield()
861 }
862 }
863
864 perThreadSyscall = perThreadSyscallArgs{}
865
866 releasem(getg().m)
867 allocmLock.unlock()
868 startTheWorld(stw)
869
870 return r1, r2, errno
871 }
872
873
874
875
876
877
878
879 func runPerThreadSyscall() {
880 gp := getg()
881 if gp.m.needPerThreadSyscall.Load() == 0 {
882 return
883 }
884
885 args := perThreadSyscall
886 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
887 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
888
889 r2 = 0
890 }
891 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
892 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
893 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
894 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
895 }
896
897 gp.m.needPerThreadSyscall.Store(0)
898 }
899
900 const (
901 _SI_USER = 0
902 _SI_TKILL = -6
903 _SYS_SECCOMP = 1
904 )
905
906
907
908
909
910 func (c *sigctxt) sigFromUser() bool {
911 code := int32(c.sigcode())
912 return code == _SI_USER || code == _SI_TKILL
913 }
914
915
916
917
918 func (c *sigctxt) sigFromSeccomp() bool {
919 code := int32(c.sigcode())
920 return code == _SYS_SECCOMP
921 }
922
923
924 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
925 r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
926 return int32(r), int32(err)
927 }
928
View as plain text