1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
// fgprof is a sampling Go profiler that allows you to analyze On-CPU as well
// as [Off-CPU](http://www.brendangregg.com/offcpuanalysis.html) (e.g. I/O)
// time together.
package fgprof
import (
"io"
"runtime"
"strings"
"time"
)
// Start begins profiling the goroutines of the program and returns a function
// that needs to be invoked by the caller to stop the profiling and write the
// results to w using the given format.
func Start(w io.Writer, format Format) func() error {
// Go's CPU profiler uses 100hz, but 99hz might be less likely to result in
// accidental synchronization with the program we're profiling.
const hz = 99
ticker := time.NewTicker(time.Second / hz)
stopCh := make(chan struct{})
prof := &profiler{}
stackCounts := stackCounter{}
go func() {
defer ticker.Stop()
for {
select {
case <-ticker.C:
stacks := prof.GoroutineProfile()
stackCounts.Update(stacks)
case <-stopCh:
return
}
}
}()
return func() error {
stopCh <- struct{}{}
return writeFormat(w, stackCounts.HumanMap(prof.SelfFrame()), format, hz)
}
}
// profiler provides a convenient and performant way to access
// runtime.GoroutineProfile().
type profiler struct {
stacks []runtime.StackRecord
selfFrame *runtime.Frame
}
// GoroutineProfile returns the stacks of all goroutines currently managed by
// the scheduler. This includes both goroutines that are currently running
// (On-CPU), as well as waiting (Off-CPU).
func (p *profiler) GoroutineProfile() []runtime.StackRecord {
if p.selfFrame == nil {
// Determine the runtime.Frame of this func so we can hide it from our
// profiling output.
rpc := make([]uintptr, 1)
n := runtime.Callers(1, rpc)
if n < 1 {
panic("could not determine selfFrame")
}
selfFrame, _ := runtime.CallersFrames(rpc).Next()
p.selfFrame = &selfFrame
}
// We don't know how many goroutines exist, so we have to grow p.stacks
// dynamically. We overshoot by 10% since it's possible that more goroutines
// are launched in between two calls to GoroutineProfile. Once p.stacks
// reaches the maximum number of goroutines used by the program, it will get
// reused indefinitely, eliminating GoroutineProfile calls and allocations.
//
// TODO(fg) There might be workloads where it would be nice to shrink
// p.stacks dynamically as well, but let's not over-engineer this until we
// understand those cases better.
for {
n, ok := runtime.GoroutineProfile(p.stacks)
if !ok {
p.stacks = make([]runtime.StackRecord, int(float64(n)*1.1))
} else {
return p.stacks[0:n]
}
}
}
func (p *profiler) SelfFrame() *runtime.Frame {
return p.selfFrame
}
type stringStackCounter map[string]int
func (s stringStackCounter) Update(p []runtime.StackRecord) {
for _, pp := range p {
frames := runtime.CallersFrames(pp.Stack())
var stack []string
for {
frame, more := frames.Next()
stack = append([]string{frame.Function}, stack...)
if !more {
break
}
}
key := strings.Join(stack, ";")
s[key]++
}
}
type stackCounter map[[32]uintptr]int
func (s stackCounter) Update(p []runtime.StackRecord) {
for _, pp := range p {
s[pp.Stack0]++
}
}
// @TODO(fg) create a better interface that avoids the pprof output having to
// split the stacks using the `;` separator.
func (s stackCounter) HumanMap(exclude *runtime.Frame) map[string]int {
m := map[string]int{}
outer:
for stack0, count := range s {
frames := runtime.CallersFrames((&runtime.StackRecord{Stack0: stack0}).Stack())
var stack []string
for {
frame, more := frames.Next()
if frame.Entry == exclude.Entry {
continue outer
}
stack = append([]string{frame.Function}, stack...)
if !more {
break
}
}
key := strings.Join(stack, ";")
m[key] = count
}
return m
}
|