Source file src/cmd/vendor/golang.org/x/telemetry/start.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package telemetry
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"os"
    11  	"os/exec"
    12  	"path/filepath"
    13  	"sync"
    14  	"time"
    15  
    16  	"golang.org/x/sync/errgroup"
    17  	"golang.org/x/telemetry/counter"
    18  	"golang.org/x/telemetry/internal/crashmonitor"
    19  	"golang.org/x/telemetry/internal/telemetry"
    20  	"golang.org/x/telemetry/internal/upload"
    21  )
    22  
    23  // Config controls the behavior of [Start].
    24  type Config struct {
    25  	// ReportCrashes, if set, will enable crash reporting.
    26  	// ReportCrashes uses the [debug.SetCrashOutput] mechanism, which is a
    27  	// process-wide resource.
    28  	// Do not make other calls to that function within your application.
    29  	// ReportCrashes is a non-functional unless the program is built with go1.23+.
    30  	ReportCrashes bool
    31  
    32  	// Upload causes this program to periodically upload approved counters
    33  	// from the local telemetry database to telemetry.go.dev.
    34  	//
    35  	// This option has no effect unless the user has given consent
    36  	// to enable data collection, for example by running
    37  	// cmd/gotelemetry or affirming the gopls dialog.
    38  	//
    39  	// (This feature is expected to be used only by gopls.
    40  	// Longer term, the go command may become the sole program
    41  	// responsible for uploading.)
    42  	Upload bool
    43  
    44  	// TelemetryDir, if set, will specify an alternate telemetry
    45  	// directory to write data to. If not set, it uses the default
    46  	// directory.
    47  	// This field is intended to be used for isolating testing environments.
    48  	TelemetryDir string
    49  
    50  	// UploadStartTime, if set, overrides the time used as the upload start time,
    51  	// which is the time used by the upload logic to determine whether counter
    52  	// file data should be uploaded. Only counter files that have expired before
    53  	// the start time are considered for upload.
    54  	//
    55  	// This field can be used to simulate a future upload that collects recently
    56  	// modified counters.
    57  	UploadStartTime time.Time
    58  
    59  	// UploadURL, if set, overrides the URL used to receive uploaded reports. If
    60  	// unset, this URL defaults to https://telemetry.go.dev/upload.
    61  	UploadURL string
    62  }
    63  
    64  // Start initializes telemetry using the specified configuration.
    65  //
    66  // Start opens the local telemetry database so that counter increment
    67  // operations are durably recorded in the local file system.
    68  //
    69  // If [Config.Upload] is set, and the user has opted in to telemetry
    70  // uploading, this process may attempt to upload approved counters
    71  // to telemetry.go.dev.
    72  //
    73  // If [Config.ReportCrashes] is set, any fatal crash will be
    74  // recorded by incrementing a counter named for the stack of the
    75  // first running goroutine in the traceback.
    76  //
    77  // If either of these flags is set, Start re-executes the current
    78  // executable as a child process, in a special mode in which it
    79  // acts as a telemetry sidecar for the parent process (the application).
    80  // In that mode, the call to Start will never return, so Start must
    81  // be called immediately within main, even before such things as
    82  // inspecting the command line. The application should avoid expensive
    83  // steps or external side effects in init functions, as they will
    84  // be executed twice (parent and child).
    85  //
    86  // Start returns a StartResult, which may be awaited via [StartResult.Wait] to
    87  // wait for all work done by Start to complete.
    88  func Start(config Config) *StartResult {
    89  	switch v := os.Getenv(telemetryChildVar); v {
    90  	case "":
    91  		// The subprocess started by parent has GO_TELEMETRY_CHILD=1.
    92  		return parent(config)
    93  	case "1":
    94  		child(config) // child will exit the process when it's done.
    95  	case "2":
    96  		// Do nothing: this was executed directly or indirectly by a child.
    97  	default:
    98  		log.Fatalf("unexpected value for %q: %q", telemetryChildVar, v)
    99  	}
   100  
   101  	return &StartResult{}
   102  }
   103  
   104  // MaybeChild executes the telemetry child logic if the calling program is
   105  // the telemetry child process, and does nothing otherwise. It is meant to be
   106  // called as the first thing in a program that uses telemetry.Start but cannot
   107  // call telemetry.Start immediately when it starts.
   108  func MaybeChild(config Config) {
   109  	if v := os.Getenv(telemetryChildVar); v == "1" {
   110  		child(config) // child will exit the process when it's done.
   111  	}
   112  	// other values of the telemetryChildVar environment variable
   113  	// will be handled by telemetry.Start.
   114  }
   115  
   116  // A StartResult is a handle to the result of a call to [Start]. Call
   117  // [StartResult.Wait] to wait for the completion of all work done on behalf of
   118  // Start.
   119  type StartResult struct {
   120  	wg sync.WaitGroup
   121  }
   122  
   123  // Wait waits for the completion of all work initiated by [Start].
   124  func (res *StartResult) Wait() {
   125  	if res == nil {
   126  		return
   127  	}
   128  	res.wg.Wait()
   129  }
   130  
   131  var daemonize = func(cmd *exec.Cmd) {}
   132  
   133  // If telemetryChildVar is set to "1" in the environment, this is the telemetry
   134  // child.
   135  //
   136  // If telemetryChildVar is set to "2", this is a child of the child, and no
   137  // further forking should occur.
   138  const telemetryChildVar = "GO_TELEMETRY_CHILD"
   139  
   140  // If telemetryUploadVar is set to "1" in the environment, the upload token has been
   141  // acquired by the parent, and the child should attempt an upload.
   142  const telemetryUploadVar = "GO_TELEMETRY_CHILD_UPLOAD"
   143  
   144  func parent(config Config) *StartResult {
   145  	if config.TelemetryDir != "" {
   146  		telemetry.Default = telemetry.NewDir(config.TelemetryDir)
   147  	}
   148  	result := new(StartResult)
   149  
   150  	mode, _ := telemetry.Default.Mode()
   151  	if mode == "off" {
   152  		// Telemetry is turned off. Crash reporting doesn't work without telemetry
   153  		// at least set to "local". The upload process runs in both "on" and "local" modes.
   154  		// In local mode the upload process builds local reports but does not do the upload.
   155  		return result
   156  	}
   157  
   158  	counter.Open()
   159  
   160  	if _, err := os.Stat(telemetry.Default.LocalDir()); err != nil {
   161  		// There was a problem statting LocalDir, which is needed for both
   162  		// crash monitoring and counter uploading. Most likely, there was an
   163  		// error creating telemetry.LocalDir in the counter.Open call above.
   164  		// Don't start the child.
   165  		return result
   166  	}
   167  
   168  	childShouldUpload := config.Upload && acquireUploadToken()
   169  	reportCrashes := config.ReportCrashes && crashmonitor.Supported()
   170  
   171  	if reportCrashes || childShouldUpload {
   172  		startChild(reportCrashes, childShouldUpload, result)
   173  	}
   174  
   175  	return result
   176  }
   177  
   178  func startChild(reportCrashes, upload bool, result *StartResult) {
   179  	// This process is the application (parent).
   180  	// Fork+exec the telemetry child.
   181  	exe, err := os.Executable()
   182  	if err != nil {
   183  		// There was an error getting os.Executable. It's possible
   184  		// for this to happen on AIX if os.Args[0] is not an absolute
   185  		// path and we can't find os.Args[0] in PATH.
   186  		log.Printf("failed to start telemetry sidecar: os.Executable: %v", err)
   187  		return
   188  	}
   189  	cmd := exec.Command(exe, "** telemetry **") // this unused arg is just for ps(1)
   190  	daemonize(cmd)
   191  	cmd.Env = append(os.Environ(), telemetryChildVar+"=1")
   192  	if upload {
   193  		cmd.Env = append(cmd.Env, telemetryUploadVar+"=1")
   194  	}
   195  	cmd.Dir = telemetry.Default.LocalDir()
   196  
   197  	// The child process must write to a log file, not
   198  	// the stderr file it inherited from the parent, as
   199  	// the child may outlive the parent but should not prolong
   200  	// the life of any pipes created (by the grandparent)
   201  	// to gather the output of the parent.
   202  	//
   203  	// By default, we discard the child process's stderr,
   204  	// but in line with the uploader, log to a file in debug
   205  	// only if that directory was created by the user.
   206  	fd, err := os.Stat(telemetry.Default.DebugDir())
   207  	if err != nil {
   208  		if !os.IsNotExist(err) {
   209  			log.Printf("failed to stat debug directory: %v", err)
   210  			return
   211  		}
   212  	} else if fd.IsDir() {
   213  		// local/debug exists and is a directory. Set stderr to a log file path
   214  		// in local/debug.
   215  		childLogPath := filepath.Join(telemetry.Default.DebugDir(), "sidecar.log")
   216  		childLog, err := os.OpenFile(childLogPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0600)
   217  		if err != nil {
   218  			log.Printf("opening sidecar log file for child: %v", err)
   219  			return
   220  		}
   221  		defer childLog.Close()
   222  		cmd.Stderr = childLog
   223  	}
   224  
   225  	var crashOutputFile *os.File
   226  	if reportCrashes {
   227  		pipe, err := cmd.StdinPipe()
   228  		if err != nil {
   229  			log.Printf("StdinPipe: %v", err)
   230  			return
   231  		}
   232  
   233  		crashOutputFile = pipe.(*os.File) // (this conversion is safe)
   234  	}
   235  
   236  	if err := cmd.Start(); err != nil {
   237  		// The child couldn't be started. Log the failure.
   238  		log.Printf("can't start telemetry child process: %v", err)
   239  		return
   240  	}
   241  	if reportCrashes {
   242  		crashmonitor.Parent(crashOutputFile)
   243  	}
   244  	result.wg.Add(1)
   245  	go func() {
   246  		cmd.Wait() // Release resources if cmd happens not to outlive this process.
   247  		result.wg.Done()
   248  	}()
   249  }
   250  
   251  func child(config Config) {
   252  	log.SetPrefix(fmt.Sprintf("telemetry-sidecar (pid %v): ", os.Getpid()))
   253  
   254  	if config.TelemetryDir != "" {
   255  		telemetry.Default = telemetry.NewDir(config.TelemetryDir)
   256  	}
   257  
   258  	// golang/go#67211: be sure to set telemetryChildVar before running the
   259  	// child, because the child itself invokes the go command to download the
   260  	// upload config. If the telemetryChildVar variable is still set to "1",
   261  	// that delegated go command may think that it is itself a telemetry
   262  	// child.
   263  	//
   264  	// On the other hand, if telemetryChildVar were simply unset, then the
   265  	// delegated go commands would fork themselves recursively. Short-circuit
   266  	// this recursion.
   267  	os.Setenv(telemetryChildVar, "2")
   268  	upload := os.Getenv(telemetryUploadVar) == "1"
   269  
   270  	reportCrashes := config.ReportCrashes && crashmonitor.Supported()
   271  	uploadStartTime := config.UploadStartTime
   272  	uploadURL := config.UploadURL
   273  
   274  	// Start crashmonitoring and uploading depending on what's requested
   275  	// and wait for the longer running child to complete before exiting:
   276  	// if we collected a crash before the upload finished, wait for the
   277  	// upload to finish before exiting
   278  	var g errgroup.Group
   279  
   280  	if reportCrashes {
   281  		g.Go(func() error {
   282  			crashmonitor.Child()
   283  			return nil
   284  		})
   285  	}
   286  	if upload {
   287  		g.Go(func() error {
   288  			uploaderChild(uploadStartTime, uploadURL)
   289  			return nil
   290  		})
   291  	}
   292  	g.Wait()
   293  
   294  	os.Exit(0)
   295  }
   296  
   297  func uploaderChild(asof time.Time, uploadURL string) {
   298  	if err := upload.Run(upload.RunConfig{
   299  		UploadURL: uploadURL,
   300  		LogWriter: os.Stderr,
   301  		StartTime: asof,
   302  	}); err != nil {
   303  		log.Printf("upload failed: %v", err)
   304  	}
   305  }
   306  
   307  // acquireUploadToken acquires a token permitting the caller to upload.
   308  // To limit the frequency of uploads, only one token is issue per
   309  // machine per time period.
   310  // The boolean indicates whether the token was acquired.
   311  func acquireUploadToken() bool {
   312  	if telemetry.Default.LocalDir() == "" {
   313  		// The telemetry dir wasn't initialized properly, probably because
   314  		// os.UserConfigDir did not complete successfully. In that case
   315  		// there are no counters to upload, so we should just do nothing.
   316  		return false
   317  	}
   318  	tokenfile := filepath.Join(telemetry.Default.LocalDir(), "upload.token")
   319  	const period = 24 * time.Hour
   320  
   321  	// A process acquires a token by successfully creating a
   322  	// well-known file. If the file already exists and has an
   323  	// mtime age less then than the period, the process does
   324  	// not acquire the token. If the file is older than the
   325  	// period, the process is allowed to remove the file and
   326  	// try to re-create it.
   327  	fi, err := os.Stat(tokenfile)
   328  	if err == nil {
   329  		if time.Since(fi.ModTime()) < period {
   330  			return false
   331  		}
   332  		// There's a possible race here where two processes check the
   333  		// token file and see that it's older than the period, then the
   334  		// first one removes it and creates another, and then a second one
   335  		// removes the newly created file and creates yet another
   336  		// file. Then both processes would act as though they had the token.
   337  		// This is very rare, but it's also okay because we're only grabbing
   338  		// the token to do rate limiting, not for correctness.
   339  		_ = os.Remove(tokenfile)
   340  	} else if !os.IsNotExist(err) {
   341  		log.Printf("error acquiring upload taken: statting token file: %v", err)
   342  		return false
   343  	}
   344  
   345  	f, err := os.OpenFile(tokenfile, os.O_CREATE|os.O_EXCL, 0666)
   346  	if err != nil {
   347  		if os.IsExist(err) {
   348  			return false
   349  		}
   350  		log.Printf("error acquiring upload token: creating token file: %v", err)
   351  		return false
   352  	}
   353  	_ = f.Close()
   354  	return true
   355  }
   356  

View as plain text