Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "context"
9 "flag"
10 "fmt"
11 "internal/sysinfo"
12 "io"
13 "math"
14 "os"
15 "runtime"
16 "slices"
17 "strconv"
18 "strings"
19 "sync"
20 "sync/atomic"
21 "time"
22 "unicode"
23 )
24
25 func initBenchmarkFlags() {
26 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
27 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
28 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
29 }
30
31 var (
32 matchBenchmarks *string
33 benchmarkMemory *bool
34
35 benchTime = durationOrCountFlag{d: 1 * time.Second}
36 )
37
38 type durationOrCountFlag struct {
39 d time.Duration
40 n int
41 allowZero bool
42 }
43
44 func (f *durationOrCountFlag) String() string {
45 if f.n > 0 {
46 return fmt.Sprintf("%dx", f.n)
47 }
48 return f.d.String()
49 }
50
51 func (f *durationOrCountFlag) Set(s string) error {
52 if strings.HasSuffix(s, "x") {
53 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
54 if err != nil || n < 0 || (!f.allowZero && n == 0) {
55 return fmt.Errorf("invalid count")
56 }
57 *f = durationOrCountFlag{n: int(n)}
58 return nil
59 }
60 d, err := time.ParseDuration(s)
61 if err != nil || d < 0 || (!f.allowZero && d == 0) {
62 return fmt.Errorf("invalid duration")
63 }
64 *f = durationOrCountFlag{d: d}
65 return nil
66 }
67
68
69 var benchmarkLock sync.Mutex
70
71
72 var memStats runtime.MemStats
73
74
75
76 type InternalBenchmark struct {
77 Name string
78 F func(b *B)
79 }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 type B struct {
95 common
96 importPath string
97 bstate *benchState
98 N int
99 previousN int
100 previousDuration time.Duration
101 benchFunc func(b *B)
102 benchTime durationOrCountFlag
103 bytes int64
104 missingBytes bool
105 timerOn bool
106 showAllocResult bool
107 result BenchmarkResult
108 parallelism int
109
110 startAllocs uint64
111 startBytes uint64
112
113 netAllocs uint64
114 netBytes uint64
115
116 extra map[string]float64
117
118
119
120 loopN int
121 }
122
123
124
125
126 func (b *B) StartTimer() {
127 if !b.timerOn {
128 runtime.ReadMemStats(&memStats)
129 b.startAllocs = memStats.Mallocs
130 b.startBytes = memStats.TotalAlloc
131 b.start = highPrecisionTimeNow()
132 b.timerOn = true
133 }
134 }
135
136
137
138 func (b *B) StopTimer() {
139 if b.timerOn {
140 b.duration += highPrecisionTimeSince(b.start)
141 runtime.ReadMemStats(&memStats)
142 b.netAllocs += memStats.Mallocs - b.startAllocs
143 b.netBytes += memStats.TotalAlloc - b.startBytes
144 b.timerOn = false
145 }
146 }
147
148
149
150
151 func (b *B) ResetTimer() {
152 if b.extra == nil {
153
154
155 b.extra = make(map[string]float64, 16)
156 } else {
157 clear(b.extra)
158 }
159 if b.timerOn {
160 runtime.ReadMemStats(&memStats)
161 b.startAllocs = memStats.Mallocs
162 b.startBytes = memStats.TotalAlloc
163 b.start = highPrecisionTimeNow()
164 }
165 b.duration = 0
166 b.netAllocs = 0
167 b.netBytes = 0
168 }
169
170
171
172 func (b *B) SetBytes(n int64) { b.bytes = n }
173
174
175
176
177 func (b *B) ReportAllocs() {
178 b.showAllocResult = true
179 }
180
181
182 func (b *B) runN(n int) {
183 benchmarkLock.Lock()
184 defer benchmarkLock.Unlock()
185 ctx, cancelCtx := context.WithCancel(context.Background())
186 defer func() {
187 b.runCleanup(normalPanic)
188 b.checkRaces()
189 }()
190
191
192 runtime.GC()
193 b.resetRaces()
194 b.N = n
195 b.loopN = 0
196 b.ctx = ctx
197 b.cancelCtx = cancelCtx
198
199 b.parallelism = 1
200 b.ResetTimer()
201 b.StartTimer()
202 b.benchFunc(b)
203 b.StopTimer()
204 b.previousN = n
205 b.previousDuration = b.duration
206 }
207
208
209
210 func (b *B) run1() bool {
211 if bstate := b.bstate; bstate != nil {
212
213 if n := len(b.name) + bstate.extLen + 1; n > bstate.maxLen {
214 bstate.maxLen = n + 8
215 }
216 }
217 go func() {
218
219
220 defer func() {
221 b.signal <- true
222 }()
223
224 b.runN(1)
225 }()
226 <-b.signal
227 if b.failed {
228 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
229 return false
230 }
231
232
233 b.mu.RLock()
234 finished := b.finished
235 b.mu.RUnlock()
236 if b.hasSub.Load() || finished {
237 tag := "BENCH"
238 if b.skipped {
239 tag = "SKIP"
240 }
241 if b.chatty != nil && (len(b.output) > 0 || finished) {
242 b.trimOutput()
243 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
244 }
245 return false
246 }
247 return true
248 }
249
250 var labelsOnce sync.Once
251
252
253
254 func (b *B) run() {
255 labelsOnce.Do(func() {
256 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
257 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
258 if b.importPath != "" {
259 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
260 }
261 if cpu := sysinfo.CPUName(); cpu != "" {
262 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
263 }
264 })
265 if b.bstate != nil {
266
267 b.bstate.processBench(b)
268 } else {
269
270 b.doBench()
271 }
272 }
273
274 func (b *B) doBench() BenchmarkResult {
275 go b.launch()
276 <-b.signal
277 return b.result
278 }
279
280 func predictN(goalns int64, prevIters int64, prevns int64, last int64) int {
281 if prevns == 0 {
282
283 prevns = 1
284 }
285
286
287
288
289
290
291 n := goalns * prevIters / prevns
292
293 n += n / 5
294
295 n = min(n, 100*last)
296
297 n = max(n, last+1)
298
299 n = min(n, 1e9)
300 return int(n)
301 }
302
303
304
305
306
307 func (b *B) launch() {
308
309
310 defer func() {
311 b.signal <- true
312 }()
313
314
315
316 if b.loopN == 0 {
317
318 if b.benchTime.n > 0 {
319
320
321
322 if b.benchTime.n > 1 {
323 b.runN(b.benchTime.n)
324 }
325 } else {
326 d := b.benchTime.d
327 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
328 last := n
329
330 goalns := d.Nanoseconds()
331 prevIters := int64(b.N)
332 n = int64(predictN(goalns, prevIters, b.duration.Nanoseconds(), last))
333 b.runN(int(n))
334 }
335 }
336 }
337 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
338 }
339
340
341
342
343 func (b *B) Elapsed() time.Duration {
344 d := b.duration
345 if b.timerOn {
346 d += highPrecisionTimeSince(b.start)
347 }
348 return d
349 }
350
351
352
353
354
355
356
357
358
359
360 func (b *B) ReportMetric(n float64, unit string) {
361 if unit == "" {
362 panic("metric unit must not be empty")
363 }
364 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
365 panic("metric unit must not contain whitespace")
366 }
367 b.extra[unit] = n
368 }
369
370 func (b *B) stopOrScaleBLoop() bool {
371 timeElapsed := highPrecisionTimeSince(b.start)
372 if timeElapsed >= b.benchTime.d {
373
374 b.StopTimer()
375 return false
376 }
377
378 goalns := b.benchTime.d.Nanoseconds()
379 prevIters := int64(b.N)
380 b.N = predictN(goalns, prevIters, timeElapsed.Nanoseconds(), prevIters)
381 b.loopN++
382 return true
383 }
384
385 func (b *B) loopSlowPath() bool {
386 if b.loopN == 0 {
387
388
389
390 b.N = 1
391 b.loopN = 1
392 b.ResetTimer()
393 return true
394 }
395
396 if b.benchTime.n > 0 {
397 if b.N < b.benchTime.n {
398 b.N = b.benchTime.n
399 b.loopN++
400 return true
401 }
402 b.StopTimer()
403 return false
404 }
405
406 return b.stopOrScaleBLoop()
407 }
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442 func (b *B) Loop() bool {
443 if b.loopN != 0 && b.loopN < b.N {
444 b.loopN++
445 return true
446 }
447 return b.loopSlowPath()
448 }
449
450
451 type BenchmarkResult struct {
452 N int
453 T time.Duration
454 Bytes int64
455 MemAllocs uint64
456 MemBytes uint64
457
458
459 Extra map[string]float64
460 }
461
462
463 func (r BenchmarkResult) NsPerOp() int64 {
464 if v, ok := r.Extra["ns/op"]; ok {
465 return int64(v)
466 }
467 if r.N <= 0 {
468 return 0
469 }
470 return r.T.Nanoseconds() / int64(r.N)
471 }
472
473
474 func (r BenchmarkResult) mbPerSec() float64 {
475 if v, ok := r.Extra["MB/s"]; ok {
476 return v
477 }
478 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
479 return 0
480 }
481 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
482 }
483
484
485
486 func (r BenchmarkResult) AllocsPerOp() int64 {
487 if v, ok := r.Extra["allocs/op"]; ok {
488 return int64(v)
489 }
490 if r.N <= 0 {
491 return 0
492 }
493 return int64(r.MemAllocs) / int64(r.N)
494 }
495
496
497
498 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
499 if v, ok := r.Extra["B/op"]; ok {
500 return int64(v)
501 }
502 if r.N <= 0 {
503 return 0
504 }
505 return int64(r.MemBytes) / int64(r.N)
506 }
507
508
509
510
511
512
513
514
515 func (r BenchmarkResult) String() string {
516 buf := new(strings.Builder)
517 fmt.Fprintf(buf, "%8d", r.N)
518
519
520 ns, ok := r.Extra["ns/op"]
521 if !ok {
522 ns = float64(r.T.Nanoseconds()) / float64(r.N)
523 }
524 if ns != 0 {
525 buf.WriteByte('\t')
526 prettyPrint(buf, ns, "ns/op")
527 }
528
529 if mbs := r.mbPerSec(); mbs != 0 {
530 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
531 }
532
533
534
535 var extraKeys []string
536 for k := range r.Extra {
537 switch k {
538 case "ns/op", "MB/s", "B/op", "allocs/op":
539
540 continue
541 }
542 extraKeys = append(extraKeys, k)
543 }
544 slices.Sort(extraKeys)
545 for _, k := range extraKeys {
546 buf.WriteByte('\t')
547 prettyPrint(buf, r.Extra[k], k)
548 }
549 return buf.String()
550 }
551
552 func prettyPrint(w io.Writer, x float64, unit string) {
553
554
555
556
557 var format string
558 switch y := math.Abs(x); {
559 case y == 0 || y >= 999.95:
560 format = "%10.0f %s"
561 case y >= 99.995:
562 format = "%12.1f %s"
563 case y >= 9.9995:
564 format = "%13.2f %s"
565 case y >= 0.99995:
566 format = "%14.3f %s"
567 case y >= 0.099995:
568 format = "%15.4f %s"
569 case y >= 0.0099995:
570 format = "%16.5f %s"
571 case y >= 0.00099995:
572 format = "%17.6f %s"
573 default:
574 format = "%18.7f %s"
575 }
576 fmt.Fprintf(w, format, x, unit)
577 }
578
579
580 func (r BenchmarkResult) MemString() string {
581 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
582 r.AllocedBytesPerOp(), r.AllocsPerOp())
583 }
584
585
586 func benchmarkName(name string, n int) string {
587 if n != 1 {
588 return fmt.Sprintf("%s-%d", name, n)
589 }
590 return name
591 }
592
593 type benchState struct {
594 match *matcher
595
596 maxLen int
597 extLen int
598 }
599
600
601
602 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
603 runBenchmarks("", matchString, benchmarks)
604 }
605
606 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
607
608 if len(*matchBenchmarks) == 0 {
609 return true
610 }
611
612 maxprocs := 1
613 for _, procs := range cpuList {
614 if procs > maxprocs {
615 maxprocs = procs
616 }
617 }
618 bstate := &benchState{
619 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
620 extLen: len(benchmarkName("", maxprocs)),
621 }
622 var bs []InternalBenchmark
623 for _, Benchmark := range benchmarks {
624 if _, matched, _ := bstate.match.fullName(nil, Benchmark.Name); matched {
625 bs = append(bs, Benchmark)
626 benchName := benchmarkName(Benchmark.Name, maxprocs)
627 if l := len(benchName) + bstate.extLen + 1; l > bstate.maxLen {
628 bstate.maxLen = l
629 }
630 }
631 }
632 main := &B{
633 common: common{
634 name: "Main",
635 w: os.Stdout,
636 bench: true,
637 },
638 importPath: importPath,
639 benchFunc: func(b *B) {
640 for _, Benchmark := range bs {
641 b.Run(Benchmark.Name, Benchmark.F)
642 }
643 },
644 benchTime: benchTime,
645 bstate: bstate,
646 }
647 if Verbose() {
648 main.chatty = newChattyPrinter(main.w)
649 }
650 main.runN(1)
651 return !main.failed
652 }
653
654
655 func (s *benchState) processBench(b *B) {
656 for i, procs := range cpuList {
657 for j := uint(0); j < *count; j++ {
658 runtime.GOMAXPROCS(procs)
659 benchName := benchmarkName(b.name, procs)
660
661
662 if b.chatty == nil {
663 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
664 }
665
666 if i > 0 || j > 0 {
667 b = &B{
668 common: common{
669 signal: make(chan bool),
670 name: b.name,
671 w: b.w,
672 chatty: b.chatty,
673 bench: true,
674 },
675 benchFunc: b.benchFunc,
676 benchTime: b.benchTime,
677 }
678 b.run1()
679 }
680 r := b.doBench()
681 if b.failed {
682
683
684
685 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
686 continue
687 }
688 results := r.String()
689 if b.chatty != nil {
690 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
691 }
692 if *benchmarkMemory || b.showAllocResult {
693 results += "\t" + r.MemString()
694 }
695 fmt.Fprintln(b.w, results)
696
697
698 if len(b.output) > 0 {
699 b.trimOutput()
700 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
701 }
702 if p := runtime.GOMAXPROCS(-1); p != procs {
703 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
704 }
705 if b.chatty != nil && b.chatty.json {
706 b.chatty.Updatef("", "=== NAME %s\n", "")
707 }
708 }
709 }
710 }
711
712
713
714
715 var hideStdoutForTesting = false
716
717
718
719
720
721
722 func (b *B) Run(name string, f func(b *B)) bool {
723
724
725 b.hasSub.Store(true)
726 benchmarkLock.Unlock()
727 defer benchmarkLock.Lock()
728
729 benchName, ok, partial := b.name, true, false
730 if b.bstate != nil {
731 benchName, ok, partial = b.bstate.match.fullName(&b.common, name)
732 }
733 if !ok {
734 return true
735 }
736 var pc [maxStackLen]uintptr
737 n := runtime.Callers(2, pc[:])
738 sub := &B{
739 common: common{
740 signal: make(chan bool),
741 name: benchName,
742 parent: &b.common,
743 level: b.level + 1,
744 creator: pc[:n],
745 w: b.w,
746 chatty: b.chatty,
747 bench: true,
748 },
749 importPath: b.importPath,
750 benchFunc: f,
751 benchTime: b.benchTime,
752 bstate: b.bstate,
753 }
754 if partial {
755
756
757 sub.hasSub.Store(true)
758 }
759
760 if b.chatty != nil {
761 labelsOnce.Do(func() {
762 fmt.Printf("goos: %s\n", runtime.GOOS)
763 fmt.Printf("goarch: %s\n", runtime.GOARCH)
764 if b.importPath != "" {
765 fmt.Printf("pkg: %s\n", b.importPath)
766 }
767 if cpu := sysinfo.CPUName(); cpu != "" {
768 fmt.Printf("cpu: %s\n", cpu)
769 }
770 })
771
772 if !hideStdoutForTesting {
773 if b.chatty.json {
774 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName)
775 }
776 fmt.Println(benchName)
777 }
778 }
779
780 if sub.run1() {
781 sub.run()
782 }
783 b.add(sub.result)
784 return !sub.failed
785 }
786
787
788
789
790 func (b *B) add(other BenchmarkResult) {
791 r := &b.result
792
793
794 r.N = 1
795 r.T += time.Duration(other.NsPerOp())
796 if other.Bytes == 0 {
797
798
799 b.missingBytes = true
800 r.Bytes = 0
801 }
802 if !b.missingBytes {
803 r.Bytes += other.Bytes
804 }
805 r.MemAllocs += uint64(other.AllocsPerOp())
806 r.MemBytes += uint64(other.AllocedBytesPerOp())
807 }
808
809
810 func (b *B) trimOutput() {
811
812
813
814 const maxNewlines = 10
815 for nlCount, j := 0, 0; j < len(b.output); j++ {
816 if b.output[j] == '\n' {
817 nlCount++
818 if nlCount >= maxNewlines {
819 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
820 break
821 }
822 }
823 }
824 }
825
826
827 type PB struct {
828 globalN *atomic.Uint64
829 grain uint64
830 cache uint64
831 bN uint64
832 }
833
834
835 func (pb *PB) Next() bool {
836 if pb.cache == 0 {
837 n := pb.globalN.Add(pb.grain)
838 if n <= pb.bN {
839 pb.cache = pb.grain
840 } else if n < pb.bN+pb.grain {
841 pb.cache = pb.bN + pb.grain - n
842 } else {
843 return false
844 }
845 }
846 pb.cache--
847 return true
848 }
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863 func (b *B) RunParallel(body func(*PB)) {
864 if b.N == 0 {
865 return
866 }
867
868
869
870 grain := uint64(0)
871 if b.previousN > 0 && b.previousDuration > 0 {
872 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
873 }
874 if grain < 1 {
875 grain = 1
876 }
877
878
879 if grain > 1e4 {
880 grain = 1e4
881 }
882
883 var n atomic.Uint64
884 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
885 var wg sync.WaitGroup
886 wg.Add(numProcs)
887 for p := 0; p < numProcs; p++ {
888 go func() {
889 defer wg.Done()
890 pb := &PB{
891 globalN: &n,
892 grain: grain,
893 bN: uint64(b.N),
894 }
895 body(pb)
896 }()
897 }
898 wg.Wait()
899 if n.Load() <= uint64(b.N) && !b.Failed() {
900 b.Fatal("RunParallel: body exited without pb.Next() == false")
901 }
902 }
903
904
905
906
907 func (b *B) SetParallelism(p int) {
908 if p >= 1 {
909 b.parallelism = p
910 }
911 }
912
913
914
915
916
917
918
919
920
921 func Benchmark(f func(b *B)) BenchmarkResult {
922 b := &B{
923 common: common{
924 signal: make(chan bool),
925 w: discard{},
926 },
927 benchFunc: f,
928 benchTime: benchTime,
929 }
930 if b.run1() {
931 b.run()
932 }
933 return b.result
934 }
935
936 type discard struct{}
937
938 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
939
View as plain text