File: filecache.go

package info (click to toggle)
golang-golang-x-tools 1%3A0.5.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bookworm-backports
  • size: 16,592 kB
  • sloc: javascript: 2,011; asm: 1,635; sh: 192; yacc: 155; makefile: 52; ansic: 8
file content (272 lines) | stat: -rw-r--r-- 8,188 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// The filecache package provides a file-based shared durable blob cache.
//
// The cache is a machine-global mapping from (kind string, key
// [32]byte) to []byte, where kind is an identifier describing the
// namespace or purpose (e.g. "analysis"), and key is a SHA-256 digest
// of the recipe of the value. (It need not be the digest of the value
// itself, so you can query the cache without knowing what value the
// recipe would produce.)
//
// The space budget of the cache can be controlled by [SetBudget].
// Cache entries may be evicted at any time or in any order.
//
// The Get and Set operations are concurrency-safe.
package filecache

import (
	"crypto/sha256"
	"errors"
	"fmt"
	"io"
	"log"
	"math/rand"
	"os"
	"path/filepath"
	"sort"
	"sync"
	"sync/atomic"
	"time"

	"golang.org/x/tools/internal/robustio"
)

// Get retrieves from the cache and returns a newly allocated
// copy of the value most recently supplied to Set(kind, key),
// possibly by another process.
// Get returns ErrNotFound if the value was not found.
func Get(kind string, key [32]byte) ([]byte, error) {
	name := filename(kind, key)
	data, err := robustio.ReadFile(name)
	if err != nil {
		if errors.Is(err, os.ErrNotExist) {
			return nil, ErrNotFound
		}
		return nil, err
	}

	// Update file time for use by LRU eviction.
	// (This turns every read into a write operation.
	// If this is a performance problem, we should
	// touch the files aynchronously.)
	now := time.Now()
	if err := setFileTime(name, now, now); err != nil {
		return nil, fmt.Errorf("failed to update access time: %w", err)
	}

	return data, nil
}

// ErrNotFound is the distinguished error
// returned by Get when the key is not found.
var ErrNotFound = fmt.Errorf("not found")

// Set updates the value in the cache.
func Set(kind string, key [32]byte, value []byte) error {
	name := filename(kind, key)
	if err := os.MkdirAll(filepath.Dir(name), 0700); err != nil {
		return err
	}

	// The sequence below uses rename to achieve atomic cache
	// updates even with concurrent processes.
	var cause error
	for try := 0; try < 3; try++ {
		tmpname := fmt.Sprintf("%s.tmp.%d", name, rand.Int())
		tmp, err := os.OpenFile(tmpname, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600)
		if err != nil {
			if os.IsExist(err) {
				// Create raced with another thread (or stale file).
				// Try again.
				cause = err
				continue
			}
			return err
		}

		_, err = tmp.Write(value)
		if closeErr := tmp.Close(); err == nil {
			err = closeErr // prefer error from write over close
		}
		if err != nil {
			os.Remove(tmp.Name()) // ignore error
			return err
		}

		err = robustio.Rename(tmp.Name(), name)
		if err == nil {
			return nil // success
		}
		cause = err

		// Rename raced with another thread. Try again.
		os.Remove(tmp.Name()) // ignore error
	}
	return cause
}

var budget int64 = 1e9 // 1GB

// SetBudget sets a soft limit on disk usage of the cache (in bytes)
// and returns the previous value. Supplying a negative value queries
// the current value without changing it.
//
// If two gopls processes have different budgets, the one with the
// lower budget will collect garbage more actively, but both will
// observe the effect.
func SetBudget(new int64) (old int64) {
	if new < 0 {
		return atomic.LoadInt64(&budget)
	}
	return atomic.SwapInt64(&budget, new)
}

// --- implementation ----

// filename returns the cache entry of the specified kind and key.
//
// A typical cache entry is a file name such as:
//
//	$HOME/Library/Caches / gopls / VVVVVVVV / kind / KK / KKKK...KKKK
//
// The portions separated by spaces are as follows:
// - The user's preferred cache directory; the default value varies by OS.
// - The constant "gopls".
// - The "version", 32 bits of the digest of the gopls executable.
// - The kind or purpose of this cache subtree (e.g. "analysis").
// - The first 8 bits of the key, to avoid huge directories.
// - The full 256 bits of the key.
//
// Once a file is written its contents are never modified, though it
// may be atomically replaced or removed.
//
// New versions of gopls are free to reorganize the contents of the
// version directory as needs evolve.  But all versions of gopls must
// in perpetuity treat the "gopls" directory in a common fashion.
//
// In particular, each gopls process attempts to garbage collect
// the entire gopls directory so that newer binaries can clean up
// after older ones: in the development cycle especially, new
// new versions may be created frequently.
func filename(kind string, key [32]byte) string {
	hex := fmt.Sprintf("%x", key)
	return filepath.Join(getCacheDir(), kind, hex[:2], hex)
}

// getCacheDir returns the persistent cache directory of all processes
// running this version of the gopls executable.
//
// It must incorporate the hash of the executable so that we needn't
// worry about incompatible changes to the file format or changes to
// the algorithm that produced the index.
func getCacheDir() string {
	cacheDirOnce.Do(func() {
		// Use user's preferred cache directory.
		userDir := os.Getenv("GOPLS_CACHE")
		if userDir == "" {
			var err error
			userDir, err = os.UserCacheDir()
			if err != nil {
				userDir = os.TempDir()
			}
		}
		goplsDir := filepath.Join(userDir, "gopls")

		// Start the garbage collector.
		go gc(goplsDir)

		// Compute the hash of this executable (~20ms) and create a subdirectory.
		hash, err := hashExecutable()
		if err != nil {
			log.Fatalf("can't hash gopls executable: %v", err)
		}
		// Use only 32 bits of the digest to avoid unwieldy filenames.
		// It's not an adversarial situation.
		cacheDir = filepath.Join(goplsDir, fmt.Sprintf("%x", hash[:4]))
		if err := os.MkdirAll(cacheDir, 0700); err != nil {
			log.Fatalf("can't create cache: %v", err)
		}
	})
	return cacheDir
}

var (
	cacheDirOnce sync.Once
	cacheDir     string // only accessed by getCacheDir
)

func hashExecutable() (hash [32]byte, err error) {
	exe, err := os.Executable()
	if err != nil {
		return hash, err
	}
	f, err := os.Open(exe)
	if err != nil {
		return hash, err
	}
	defer f.Close()
	h := sha256.New()
	if _, err := io.Copy(h, f); err != nil {
		return hash, fmt.Errorf("can't read executable: %w", err)
	}
	h.Sum(hash[:0])
	return hash, nil
}

// gc runs forever, periodically deleting files from the gopls
// directory until the space budget is no longer exceeded, and also
// deleting files older than the maximum age, regardless of budget.
//
// One gopls process may delete garbage created by a different gopls
// process, possibly running a different version of gopls, possibly
// running concurrently.
func gc(goplsDir string) {
	const period = 1 * time.Minute         // period between collections
	const statDelay = 1 * time.Millisecond // delay between stats to smooth out I/O
	const maxAge = 7 * 24 * time.Hour      // max time since last access before file is deleted

	for {
		// Enumerate all files in the cache.
		type item struct {
			path string
			stat os.FileInfo
		}
		var files []item
		var total int64 // bytes
		_ = filepath.Walk(goplsDir, func(path string, stat os.FileInfo, err error) error {
			// TODO(adonovan): opt: also collect empty directories,
			// as they typically occupy around 1KB.
			if err == nil && !stat.IsDir() {
				files = append(files, item{path, stat})
				total += stat.Size()
				time.Sleep(statDelay)
			}
			return nil
		})

		// Sort oldest files first.
		sort.Slice(files, func(i, j int) bool {
			return files[i].stat.ModTime().Before(files[j].stat.ModTime())
		})

		// Delete oldest files until we're under budget.
		// Unconditionally delete files we haven't used in ages.
		budget := atomic.LoadInt64(&budget)
		for _, file := range files {
			age := time.Since(file.stat.ModTime())
			if total > budget || age > maxAge {
				if false { // debugging
					log.Printf("deleting stale file %s (%dB, age %v)",
						file.path, file.stat.Size(), age)
				}
				os.Remove(filepath.Join(goplsDir, file.path)) // ignore error
				total -= file.stat.Size()
			}
		}

		time.Sleep(period)
	}
}