1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
|
package command
import (
"context"
"fmt"
"time"
"github.com/kelseyhightower/envconfig"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
"gitlab.com/gitlab-org/gitaly/v16/internal/tracing"
"gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
"google.golang.org/protobuf/types/known/durationpb"
)
var (
spawnTokens chan struct{}
spawnConfig SpawnConfig
spawnTimeoutCount = promauto.NewCounter(
prometheus.CounterOpts{
Name: "gitaly_spawn_timeouts_total",
Help: "Number of process spawn timeouts",
},
)
)
// SpawnConfig holds configuration for command spawning timeouts and parallelism.
type SpawnConfig struct {
// This default value (10 seconds) is very high. Spawning should take
// milliseconds or less. If we hit 10 seconds, something is wrong, and
// failing the request will create breathing room. Can be modified at
// runtime with the GITALY_COMMAND_SPAWN_TIMEOUT environment variable.
Timeout time.Duration `split_words:"true" default:"10s"`
// MaxSpawnParallel limits the number of goroutines that can spawn a
// process at the same time. These parallel spawns will contend for a
// single lock (syscall.ForkLock) in exec.Cmd.Start(). Can be modified at
// runtime with the GITALY_COMMAND_SPAWN_MAX_PARALLEL variable.
//
// Note that this does not limit the total number of child processes that
// can be attached to Gitaly at the same time. It only limits the rate at
// which we can create new child processes.
MaxParallel int `split_words:"true" default:"10"`
}
func init() {
envconfig.MustProcess("gitaly_command_spawn", &spawnConfig)
spawnTokens = make(chan struct{}, spawnConfig.MaxParallel)
}
func getSpawnToken(ctx context.Context) (putToken func(), err error) {
// Go has a global lock (syscall.ForkLock) for spawning new processes.
// This select statement is a safety valve to prevent lots of Gitaly
// requests from piling up behind the ForkLock if forking for some reason
// slows down. This has happened in real life, see
// https://gitlab.com/gitlab-org/gitaly/issues/823.
start := time.Now()
span, ctx := tracing.StartSpanIfHasParent(ctx, "command.getSpawnToken", nil)
defer span.Finish()
select {
case spawnTokens <- struct{}{}:
recordTime(ctx, start, "")
return func() {
<-spawnTokens
}, nil
case <-time.After(spawnConfig.Timeout):
recordTime(ctx, start, "spawn token timeout")
spawnTimeoutCount.Inc()
msg := fmt.Sprintf("process spawn timed out after %v", spawnConfig.Timeout)
return nil, structerr.NewResourceExhausted(msg).WithDetail(&gitalypb.LimitError{
ErrorMessage: msg,
RetryAfter: durationpb.New(0),
})
case <-ctx.Done():
return nil, ctx.Err()
}
}
func recordTime(ctx context.Context, start time.Time, msg string) {
delta := time.Since(start)
if stats := StatsFromContext(ctx); stats != nil {
stats.RecordSum("command.spawn_token_wait_ms", int(delta.Milliseconds()))
if len(msg) != 0 {
stats.RecordMetadata("command.spawn_token_error", msg)
}
}
}
|