File: retry.go

package info (click to toggle)
incus 6.0.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 24,392 kB
  • sloc: sh: 16,313; ansic: 3,121; python: 457; makefile: 337; ruby: 51; sql: 50; lisp: 6
file content (102 lines) | stat: -rw-r--r-- 2,402 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package query

import (
	"context"
	"database/sql"
	"errors"
	"math"
	"math/rand/v2"
	"net/http"
	"strings"
	"time"

	"github.com/cowsql/go-cowsql/driver"
	"github.com/mattn/go-sqlite3"

	"github.com/lxc/incus/v6/shared/api"
	"github.com/lxc/incus/v6/shared/logger"
)

const maxRetries = 250

// Retry wraps a function that interacts with the database, and retries it in
// case a transient error is hit.
//
// This should by typically used to wrap transactions.
func Retry(ctx context.Context, f func(ctx context.Context) error) error {
	var err error
	for i := range maxRetries {
		err = f(ctx)
		if err == nil {
			// The function succeeded, we're done here.
			break
		}

		if errors.Is(err, context.Canceled) {
			// The function was canceled, don't retry.
			break
		}

		// No point in re-trying or logging a no-row or not found error.
		if errors.Is(err, sql.ErrNoRows) || api.StatusErrorCheck(err, http.StatusNotFound) {
			break
		}

		// Process actual errors.
		if !IsRetriableError(err) {
			logger.Debug("Database error", logger.Ctx{"err": err})
			break
		}

		if i == maxRetries {
			logger.Warn("Database error, giving up", logger.Ctx{"attempt": i, "err": err})
			break
		}

		logger.Debug("Database error, retrying", logger.Ctx{"attempt": i, "err": err})
		time.Sleep(jitterDeviation(0.8, 100*time.Millisecond))
	}

	return err
}

func jitterDeviation(factor float64, duration time.Duration) time.Duration {
	floor := int64(math.Floor(float64(duration) * (1 - factor)))
	ceil := int64(math.Ceil(float64(duration) * (1 + factor)))
	return time.Duration(rand.Int64N(ceil-floor) + floor)
}

// IsRetriableError returns true if the given error might be transient and the
// interaction can be safely retried.
func IsRetriableError(err error) bool {
	var dErr *driver.Error

	if errors.As(err, &dErr) && dErr.Code == driver.ErrBusy {
		return true
	}

	if errors.Is(err, sqlite3.ErrLocked) || errors.Is(err, sqlite3.ErrBusy) {
		return true
	}

	// Unwrap errors one at a time.
	for ; err != nil; err = errors.Unwrap(err) {
		if strings.Contains(err.Error(), "database is locked") {
			return true
		}

		if strings.Contains(err.Error(), "cannot start a transaction within a transaction") {
			return true
		}

		if strings.Contains(err.Error(), "bad connection") {
			return true
		}

		if strings.Contains(err.Error(), "checkpoint in progress") {
			return true
		}
	}

	return false
}