File: cache.go

package info (click to toggle)
snapd 2.74.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 81,428 kB
  • sloc: sh: 16,966; ansic: 16,788; python: 11,332; makefile: 1,897; exp: 190; awk: 58; xml: 22
file content (404 lines) | stat: -rw-r--r-- 12,428 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
// -*- Mode: Go; indent-tabs-mode: t -*-

/*
 * Copyright (C) 2017 Canonical Ltd
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

package store

import (
	"errors"
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"sort"
	"sync"
	"syscall"
	"time"

	"github.com/snapcore/snapd/logger"
	"github.com/snapcore/snapd/osutil"
	"github.com/snapcore/snapd/strutil"
	"github.com/snapcore/snapd/strutil/quantity"
)

// DefaultCachePolicyCore is a recommended default policy for Core systems.
var DefaultCachePolicyCore = CachePolicy{
	// at most this many unreferenced items
	MaxItems: 5,
	// unreferenced items older than 30 days are removed
	MaxAge: 30 * 24 * time.Hour,
	// try to keep cache < 1GB
	MaxSizeBytes: 1 * 1024 * 1024 * 1024,
}

// DefaultCachePolicyClassic is a recommended default policy for classic
// systems.
var DefaultCachePolicyClassic = CachePolicy{
	// at most this many unreferenced items
	MaxItems: 5,
	// unreferenced items older than 30 days are removed
	MaxAge: 30 * 24 * time.Hour,
	// policy for classic systems has no size limit
}

// overridden in the unit tests
var osRemove = os.Remove

var ErrCleanupBusy = errors.New("cannot perform cache cleanup: cache is busy")

// downloadCache is the interface that a store download cache must provide
type downloadCache interface {
	// Get retrieves the given cacheKey content and puts it into targetPath. Returns
	// true if a cached file was moved to targetPath or if one was already there.
	Get(cacheKey, targetPath string) bool
	// Put adds a new file to the cache
	Put(cacheKey, sourcePath string) error
	// Get full path of the file in cache
	GetPath(cacheKey string) string
	// Best effort cleanup of outstanding cache items. Returns ErrCleanupBusy
	// when the cache is in use and cleanup should be retried at some later
	// time.
	Cleanup() error
}

// nullCache is cache that does not cache
type nullCache struct{}

func (cm *nullCache) Get(cacheKey, targetPath string) bool {
	return false
}
func (cm *nullCache) GetPath(cacheKey string) string {
	return ""
}
func (cm *nullCache) Put(cacheKey, sourcePath string) error { return nil }

func (cm *nullCache) Cleanup() error { return nil }

// entriesByMtime sorts by the mtime of files
type entriesByMtime []os.FileInfo

func (s entriesByMtime) Len() int           { return len(s) }
func (s entriesByMtime) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
func (s entriesByMtime) Less(i, j int) bool { return s[i].ModTime().Before(s[j].ModTime()) }

// cacheManager implements a downloadCache via content based hard linking
type CacheManager struct {
	// cleanupLock is used as a 'cleanup' synchronization point, where operations
	// for putting, getting files from the cache take a read cleanupLock, while the
	// actual cleanup operation takes the cleanupLock for writing
	cleanupLock sync.RWMutex
	cacheDir    string
	cachePolicy CachePolicy
}

// NewCacheManager returns a new CacheManager with the given cacheDir and the
// given cache policy. The idea behind it is the following algorithm:
//
//  1. When starting a download, check if it exists in $cacheDir
//  2. If found, update its mtime, hardlink into target location, and
//     return success
//  3. If not found, download the snap
//  4. On success, hardlink into $cacheDir/<digest>
//  5. Apply cache policy and remove items identified by the policy.
//
// The caching part is done here, the downloading happens in the store.go
// code.
func NewCacheManager(cacheDir string, policy CachePolicy) *CacheManager {
	return &CacheManager{
		cacheDir:    cacheDir,
		cachePolicy: policy,
	}
}

// GetPath returns the full path of the given content in the cache or empty
// string. The path may be removed at any time. The caller needs to ensure that
// they properly handle ErrNotExist when using returned path.
func (cm *CacheManager) GetPath(cacheKey string) string {
	cm.cleanupLock.RLock()
	defer cm.cleanupLock.RUnlock()

	if _, err := os.Stat(cm.path(cacheKey)); os.IsNotExist(err) {
		return ""
	}

	return cm.path(cacheKey)
}

// Get retrieves the given cacheKey content and puts it into targetPath. Returns
// true if a cached file was moved to targetPath or if one was already there.
func (cm *CacheManager) Get(cacheKey, targetPath string) bool {
	cm.cleanupLock.RLock()
	defer cm.cleanupLock.RUnlock()

	if err := os.Link(cm.path(cacheKey), targetPath); err != nil && !errors.Is(err, os.ErrExist) {
		return false
	}

	logger.Debugf("using cache for %s", targetPath)
	now := time.Now()
	// the modification time is updated on a best-effort basis
	_ = os.Chtimes(targetPath, now, now)
	return true
}

// Put adds a new file to the cache with the given cacheKey
func (cm *CacheManager) Put(cacheKey, sourcePath string) error {
	// always try to create the cache dir first or the following
	// osutil.IsWritable will always fail if the dir is missing
	_ = os.MkdirAll(cm.cacheDir, 0700)

	// happens on e.g. `snap download` which runs as the user
	if !osutil.IsWritable(cm.cacheDir) {
		return nil
	}

	err := func() error {
		cm.cleanupLock.RLock()
		defer cm.cleanupLock.RUnlock()

		err := os.Link(sourcePath, cm.path(cacheKey))
		if errors.Is(err, fs.ErrExist) {
			now := time.Now()
			return os.Chtimes(cm.path(cacheKey), now, now)
		}
		return err
	}()
	if err != nil {
		return err
	}

	return cm.opportunisticCleanup()
}

// count returns the number of items in the cache
func (cm *CacheManager) count() int {
	// TODO: Use something more effective than a list of all entries
	//       here. This will waste a lot of memory on large dirs.
	if l, err := os.ReadDir(cm.cacheDir); err == nil {
		return len(l)
	}
	return 0
}

// path returns the full path of the given content in the cache
func (cm *CacheManager) path(cacheKey string) string {
	return filepath.Join(cm.cacheDir, cacheKey)
}

// invokes Cleanup(), but ignores ErrCleanupBusy errors.
func (cm *CacheManager) opportunisticCleanup() error {
	if err := cm.Cleanup(); err != ErrCleanupBusy {
		return err
	}
	return nil
}

// Cleanup applies the cache policy to remove items. May return ErrCleanupBusy
// if the cleanup lock cannot be taken in which case the cleanup is skipped.
func (cm *CacheManager) Cleanup() error {
	// try to obtain exclusive lock on the cache
	if !cm.cleanupLock.TryLock() {
		return ErrCleanupBusy
	}
	defer cm.cleanupLock.Unlock()

	entries, err := os.ReadDir(cm.cacheDir)
	if err != nil {
		return err
	}

	removedCount, removedSize, err := cm.cachePolicy.Apply(entries, time.Now(), func(fi os.FileInfo) error {
		path := cm.path(fi.Name())
		logger.Debugf("removing %v", path)
		err := osRemove(path)
		if err != nil {
			if !os.IsNotExist(err) {
				// error here does not interrupt the cleanup, the cache policy
				// still tries to meet the targets
				logger.Noticef("cannot remove cache entry: %s", err)
				return err
			}
		}
		return nil
	})
	if err != nil {
		logger.Noticef("cannot apply downloads cache policy: %v", err)
	}

	logger.Noticef("removed %v entries/%s from downloads cache",
		removedCount, quantity.FormatAmount(removedSize, -1))

	return err
}

// hardLinkCount returns the number of hardlinks for the given path
func hardLinkCount(fi os.FileInfo) (uint64, error) {
	if stat, ok := fi.Sys().(*syscall.Stat_t); ok && stat != nil {
		return uint64(stat.Nlink), nil
	}
	return 0, fmt.Errorf("internal error: cannot read hardlink count from %s", fi.Name())
}

type CacheEntry struct {
	Info os.FileInfo
	// Candidate is true if the entry is a candidate for removal
	Candidate bool
	// Remove is true when entry would be removed according to the cache policy
	Remove bool
}

// StoreCacheStats contains some statistics about the store cache.
type StoreCacheStats struct {
	// TotalSize is a sum of sizes of all entries in the cache.
	TotalSize uint64
	// Entries in the cache, sorted by their modification time, starting from
	// oldest.
	Entries []CacheEntry
}

// Status returns statistics about the store cache.
func (cm *CacheManager) Stats() (*StoreCacheStats, error) {
	entries, err := os.ReadDir(cm.cacheDir)
	if err != nil {
		return nil, err
	}

	removeByName := map[string]bool{}
	_, _, err = cm.cachePolicy.Apply(entries, time.Now(), func(info os.FileInfo) error {
		removeByName[info.Name()] = true
		return nil
	})
	if err != nil {
		return nil, err
	}

	stats := StoreCacheStats{}

	for _, entry := range entries {
		fi, err := entry.Info()
		if err != nil {
			return nil, err
		}

		stats.TotalSize += uint64(fi.Size())

		stats.Entries = append(stats.Entries, CacheEntry{
			Info:      fi,
			Candidate: cm.cachePolicy.isCandidate(fi),
			Remove:    removeByName[fi.Name()],
		})
	}

	// TODO:GOVERSION: use slices.SortFunc
	sort.Slice(stats.Entries, func(i, j int) bool {
		return stats.Entries[i].Info.ModTime().Before(stats.Entries[j].Info.ModTime())
	})
	return &stats, nil
}

// CachePolicy defines the caching policy. Setting any of the limits to its zero
// value effectively disables it. A zero value (all fields in their default
// values) of CachePolicy means that no items would be dropped from cache,
// however places where it is used, such as Store.SetCachePolicy() may choose to
// disable all caching instead.
type CachePolicy struct {
	// MaxItems sets a target for maximum number of unique cache items.
	MaxItems int
	// MaxSizeBytes sets a target for maximum size of all unique items.
	MaxSizeBytes uint64
	// MaxAge sets a target for maximum age of unique cache items.
	MaxAge time.Duration
}

func (cp *CachePolicy) isCandidate(fi os.FileInfo) bool {
	n, err := hardLinkCount(fi)
	if err != nil {
		logger.Noticef("cannot inspect cache: %s", err)
	}

	// If the file is referenced in the filesystem somewhere else our copy
	// is "free" so skip it.
	return n <= 1
}

// Apply applies the cache policy for a given set of items and calls the
// provided drop callback to remove items from the cache.
//
// Internally, attempts to meet all targets defined in the cache policy, by
// processing unique cache items starting from oldest ones. Errors to drop items
// are collected and returned, but processing continues until targets are met or
// candidates list is exhausted.
func (cp *CachePolicy) Apply(entries []os.DirEntry, now time.Time, remove func(info os.FileInfo) error) (removedCount int, removedSize uint64, err error) {
	// most of the entries will have more than one hardlink, but a minority may
	// be referenced only from the cache and thus be a candidate for pruning
	candidates := make([]os.FileInfo, 0, len(entries)/5)
	candidatesSize := uint64(0)

	for _, entry := range entries {
		fi, err := entry.Info()
		if err != nil {
			return 0, 0, err
		}

		if cp.isCandidate(fi) {
			candidates = append(candidates, fi)
			candidatesSize += uint64(fi.Size())
		}
	}

	sort.Sort(entriesByMtime(candidates))

	if len(candidates) > 0 {
		logger.Debugf("store cache cleanup candidates %v total %v", len(candidates),
			quantity.FormatAmount(uint64(candidatesSize), -1))
		for _, c := range candidates {
			logger.Debugf("%s, size: %v, mod %s", c.Name(), quantity.FormatAmount(uint64(c.Size()), -1), c.ModTime())
		}
	}

	var lastErr error
	for _, c := range candidates {
		doRemove := false
		if cp.MaxAge != 0 && c.ModTime().Add(cp.MaxAge).Before(now) {
			doRemove = true
		}

		if !doRemove && cp.MaxItems != 0 && len(candidates)-removedCount > cp.MaxItems {
			doRemove = true
		}

		if !doRemove && cp.MaxSizeBytes != 0 && candidatesSize-removedSize > cp.MaxSizeBytes {
			doRemove = true
		}

		logger.Debugf("entry %v remove %v", c.Name(), doRemove)
		if doRemove {
			if err := remove(c); err != nil {
				lastErr = strutil.JoinErrors(lastErr, err)
			} else {
				// managed to drop the items, update the counts
				removedCount++
				removedSize += uint64(c.Size())
			}
		}
	}

	logger.Debugf("cache candidates to remove %v/%s", removedCount, quantity.FormatAmount(removedSize, -1))

	return removedCount, removedSize, lastErr
}