Source file src/os/pidfd_linux.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Support for pidfd was added during the course of a few Linux releases:
     6  //  v5.1: pidfd_send_signal syscall;
     7  //  v5.2: CLONE_PIDFD flag for clone syscall;
     8  //  v5.3: pidfd_open syscall, clone3 syscall;
     9  //  v5.4: P_PIDFD idtype support for waitid syscall;
    10  //  v5.6: pidfd_getfd syscall.
    11  //
    12  // N.B. Alternative Linux implementations may not follow this ordering. e.g.,
    13  // QEMU user mode 7.2 added pidfd_open, but CLONE_PIDFD was not added until
    14  // 8.0.
    15  
    16  package os
    17  
    18  import (
    19  	"errors"
    20  	"internal/syscall/unix"
    21  	"runtime"
    22  	"sync"
    23  	"syscall"
    24  	"unsafe"
    25  )
    26  
    27  // ensurePidfd initializes the PidFD field in sysAttr if it is not already set.
    28  // It returns the original or modified SysProcAttr struct and a flag indicating
    29  // whether the PidFD should be duplicated before using.
    30  func ensurePidfd(sysAttr *syscall.SysProcAttr) (*syscall.SysProcAttr, bool) {
    31  	if !pidfdWorks() {
    32  		return sysAttr, false
    33  	}
    34  
    35  	var pidfd int
    36  
    37  	if sysAttr == nil {
    38  		return &syscall.SysProcAttr{
    39  			PidFD: &pidfd,
    40  		}, false
    41  	}
    42  	if sysAttr.PidFD == nil {
    43  		newSys := *sysAttr // copy
    44  		newSys.PidFD = &pidfd
    45  		return &newSys, false
    46  	}
    47  
    48  	return sysAttr, true
    49  }
    50  
    51  // getPidfd returns the value of sysAttr.PidFD (or its duplicate if needDup is
    52  // set) and a flag indicating whether the value can be used.
    53  func getPidfd(sysAttr *syscall.SysProcAttr, needDup bool) (uintptr, bool) {
    54  	if !pidfdWorks() {
    55  		return 0, false
    56  	}
    57  
    58  	h := *sysAttr.PidFD
    59  	if needDup {
    60  		dupH, e := unix.Fcntl(h, syscall.F_DUPFD_CLOEXEC, 0)
    61  		if e != nil {
    62  			return 0, false
    63  		}
    64  		h = dupH
    65  	}
    66  	return uintptr(h), true
    67  }
    68  
    69  func pidfdFind(pid int) (uintptr, error) {
    70  	if !pidfdWorks() {
    71  		return 0, syscall.ENOSYS
    72  	}
    73  
    74  	h, err := unix.PidFDOpen(pid, 0)
    75  	if err != nil {
    76  		return 0, convertESRCH(err)
    77  	}
    78  	return h, nil
    79  }
    80  
    81  // _P_PIDFD is used as idtype argument to waitid syscall.
    82  const _P_PIDFD = 3
    83  
    84  func (p *Process) pidfdWait() (*ProcessState, error) {
    85  	// When pidfd is used, there is no wait/kill race (described in CL 23967)
    86  	// because the PID recycle issue doesn't exist (IOW, pidfd, unlike PID,
    87  	// is guaranteed to refer to one particular process). Thus, there is no
    88  	// need for the workaround (blockUntilWaitable + sigMu) from pidWait.
    89  	//
    90  	// We _do_ need to be careful about reuse of the pidfd FD number when
    91  	// closing the pidfd. See handle for more details.
    92  	handle, status := p.handleTransientAcquire()
    93  	switch status {
    94  	case statusDone:
    95  		// Process already completed Wait, or was not found by
    96  		// pidfdFind. Return ECHILD for consistency with what the wait
    97  		// syscall would return.
    98  		return nil, NewSyscallError("wait", syscall.ECHILD)
    99  	case statusReleased:
   100  		return nil, syscall.EINVAL
   101  	}
   102  	defer p.handleTransientRelease()
   103  
   104  	var (
   105  		info   unix.SiginfoChild
   106  		rusage syscall.Rusage
   107  		e      syscall.Errno
   108  	)
   109  	for {
   110  		_, _, e = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, handle, uintptr(unsafe.Pointer(&info)), syscall.WEXITED, uintptr(unsafe.Pointer(&rusage)), 0)
   111  		if e != syscall.EINTR {
   112  			break
   113  		}
   114  	}
   115  	if e != 0 {
   116  		return nil, NewSyscallError("waitid", e)
   117  	}
   118  	// Release the Process' handle reference, in addition to the reference
   119  	// we took above.
   120  	p.handlePersistentRelease(statusDone)
   121  	return &ProcessState{
   122  		pid:    int(info.Pid),
   123  		status: info.WaitStatus(),
   124  		rusage: &rusage,
   125  	}, nil
   126  }
   127  
   128  func (p *Process) pidfdSendSignal(s syscall.Signal) error {
   129  	handle, status := p.handleTransientAcquire()
   130  	switch status {
   131  	case statusDone:
   132  		return ErrProcessDone
   133  	case statusReleased:
   134  		return errors.New("os: process already released")
   135  	}
   136  	defer p.handleTransientRelease()
   137  
   138  	return convertESRCH(unix.PidFDSendSignal(handle, s))
   139  }
   140  
   141  func pidfdWorks() bool {
   142  	return checkPidfdOnce() == nil
   143  }
   144  
   145  var checkPidfdOnce = sync.OnceValue(checkPidfd)
   146  
   147  // checkPidfd checks whether all required pidfd-related syscalls work. This
   148  // consists of pidfd_open and pidfd_send_signal syscalls, waitid syscall with
   149  // idtype of P_PIDFD, and clone(CLONE_PIDFD).
   150  //
   151  // Reasons for non-working pidfd syscalls include an older kernel and an
   152  // execution environment in which the above system calls are restricted by
   153  // seccomp or a similar technology.
   154  func checkPidfd() error {
   155  	// In Android version < 12, pidfd-related system calls are not allowed
   156  	// by seccomp and trigger the SIGSYS signal. See issue #69065.
   157  	if runtime.GOOS == "android" {
   158  		ignoreSIGSYS()
   159  		defer restoreSIGSYS()
   160  	}
   161  
   162  	// Get a pidfd of the current process (opening of "/proc/self" won't
   163  	// work for waitid).
   164  	fd, err := unix.PidFDOpen(syscall.Getpid(), 0)
   165  	if err != nil {
   166  		return NewSyscallError("pidfd_open", err)
   167  	}
   168  	defer syscall.Close(int(fd))
   169  
   170  	// Check waitid(P_PIDFD) works.
   171  	for {
   172  		_, _, err = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, fd, 0, syscall.WEXITED, 0, 0)
   173  		if err != syscall.EINTR {
   174  			break
   175  		}
   176  	}
   177  	// Expect ECHILD from waitid since we're not our own parent.
   178  	if err != syscall.ECHILD {
   179  		return NewSyscallError("pidfd_wait", err)
   180  	}
   181  
   182  	// Check pidfd_send_signal works (should be able to send 0 to itself).
   183  	if err := unix.PidFDSendSignal(fd, 0); err != nil {
   184  		return NewSyscallError("pidfd_send_signal", err)
   185  	}
   186  
   187  	// Verify that clone(CLONE_PIDFD) works.
   188  	//
   189  	// This shouldn't be necessary since pidfd_open was added in Linux 5.3,
   190  	// after CLONE_PIDFD in Linux 5.2, but some alternative Linux
   191  	// implementations may not adhere to this ordering.
   192  	if err := checkClonePidfd(); err != nil {
   193  		return err
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  // Provided by syscall.
   200  //
   201  //go:linkname checkClonePidfd
   202  func checkClonePidfd() error
   203  
   204  // Provided by runtime.
   205  //
   206  //go:linkname ignoreSIGSYS
   207  func ignoreSIGSYS()
   208  
   209  //go:linkname restoreSIGSYS
   210  func restoreSIGSYS()
   211  

View as plain text