File: cache.go

package info (click to toggle)
singularity-container 4.0.3%2Bds1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 21,672 kB
  • sloc: asm: 3,857; sh: 2,125; ansic: 1,677; awk: 414; makefile: 110; python: 99
file content (396 lines) | stat: -rw-r--r-- 12,330 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
// Copyright (c) 2018-2023, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

// Package cache provides support for caching SIF, OCI, SHUB images and any OCI layers used to build them
package cache

import (
	"errors"
	"fmt"
	"io"
	"os"
	"path"
	"path/filepath"
	"strconv"
	"time"

	v1 "github.com/google/go-containerregistry/pkg/v1"
	"github.com/google/go-containerregistry/pkg/v1/empty"
	"github.com/google/go-containerregistry/pkg/v1/layout"
	"github.com/opencontainers/go-digest"
	"github.com/sylabs/singularity/v4/internal/pkg/util/fs"
	"github.com/sylabs/singularity/v4/pkg/syfs"
	"github.com/sylabs/singularity/v4/pkg/sylog"
)

var (
	errInvalidCacheType = errors.New("invalid cache type")
	errCacheDisabled    = errors.New("cache is disabled")
)

const (
	// DirEnv specifies the environment variable which can set the directory
	// for image downloads to be cached in
	DirEnv = "SINGULARITY_CACHEDIR"
	// DisableEnv specifies whether the image should be used
	DisableEnv = "SINGULARITY_DISABLE_CACHE"
	// SubDirName specifies the name of the directory relative to the
	// ParentDir specified when the cache is created.
	// By default the cache will be placed at "~/.singularity/cache" which
	// will not clash with any 2.x cache directory.
	SubDirName = "cache"

	// LibraryCacheType specifies the cache holds SIF images pulled from the library
	LibraryCacheType = "library"
	// OciTempCacheType specifies the cache holds SIF images created from OCI sources
	OciTempCacheType = "oci-tmp"
	// ShubCacheType specifies the cache holds images pulled from Singularity Hub
	ShubCacheType = "shub"
	// OrasCacheType specifies the cache holds SIF images pulled from Oras sources
	OrasCacheType = "oras"
	// NetCacheType specifies the cache holds images pulled from http(s) internet sources
	NetCacheType = "net"
	// OciSifCachetType specifies cache holds OCI-SIF conversions of OCI sources.
	OciSifCacheType = "oci-sif"

	// OciBlobCacheType specifies the cache holds OCI blobs (layers) pulled from OCI sources
	OciBlobCacheType = "blob"
)

var (
	// FileCacheTypes lists the file cache types, that store SIF or other single file images named by their hash.
	FileCacheTypes = []string{
		LibraryCacheType,
		OciTempCacheType,
		ShubCacheType,
		OrasCacheType,
		NetCacheType,
		OciSifCacheType,
	}
	// OciCacheTypes lists the OCI layout cache types, that store OCI blob content in a single OCI layout directory.
	OciCacheTypes = []string{
		OciBlobCacheType,
	}
	// AllCacheTypes lists both file and OCI layout cache types.
	AllCacheTypes = append(FileCacheTypes, OciCacheTypes...)
)

// Config describes the requested configuration requested when a new handle is created,
// as defined by the user through command flags and environment variables.
type Config struct {
	// ParentDir specifies the location where the user wants the cache to be created.
	ParentDir string
	// Disable specifies whether the user request the cache to be disabled by default.
	Disable bool
}

// Handle is an structure representing the image cache, it's location and subdirectories
type Handle struct {
	// parentDir is the parent of the cache root. This is the directory that is supplied
	// when initializing the cache
	parentDir string
	// rootDir is the cache root directory, and is inside parentDir. This is the
	// directory Singularity actually manages, i.e., that can safely be
	// deleted as opposed to the parent directory that is potentially managed
	// (passed in) by the user.
	rootDir string
	// If the cache is disabled
	disabled bool
}

func (h *Handle) GetFileCacheDir(cacheType string) (cacheDir string, err error) {
	if !stringInSlice(cacheType, FileCacheTypes) {
		return "", errInvalidCacheType
	}
	return h.getCacheTypeDir(cacheType), nil
}

func (h *Handle) GetOciCacheDir(cacheType string) (cacheDir string, err error) {
	if !stringInSlice(cacheType, OciCacheTypes) {
		return "", errInvalidCacheType
	}
	return h.getCacheTypeDir(cacheType), nil
}

func (h *Handle) GetOciCacheBlob(cacheType string, blobDigest digest.Digest) (io.ReadCloser, error) {
	if h.disabled {
		return nil, errCacheDisabled
	}
	layoutDir, err := h.GetOciCacheDir(cacheType)
	if err != nil {
		return nil, err
	}
	hash, err := v1.NewHash(blobDigest.String())
	if err != nil {
		return nil, err
	}
	layout, err := layout.FromPath(layoutDir)
	if err != nil {
		return nil, err
	}
	return layout.Blob(hash)
}

func (h *Handle) PutOciCacheBlob(cacheType string, blobDigest digest.Digest, r io.ReadCloser) (err error) {
	if h.disabled {
		return errCacheDisabled
	}
	layoutDir, err := h.GetOciCacheDir(cacheType)
	if err != nil {
		return err
	}
	hash, err := v1.NewHash(blobDigest.String())
	if err != nil {
		return err
	}
	layout, err := layout.FromPath(layoutDir)
	if err != nil {
		return err
	}
	return layout.WriteBlob(hash, r)
}

// GetEntry returns a cache Entry for a specified file cache type and hash
func (h *Handle) GetEntry(cacheType string, hash string) (e *Entry, err error) {
	if h.disabled {
		return nil, nil
	}

	e = &Entry{}

	cacheDir, err := h.GetFileCacheDir(cacheType)
	if err != nil {
		return nil, fmt.Errorf("cannot get '%s' cache directory: %v", cacheType, err)
	}

	e.Path = filepath.Join(cacheDir, hash)

	// If there is a directory it's from an older version of Singularity
	// We need to remove it as we work with single files per hash only now
	if fs.IsDir(e.Path) {
		sylog.Debugf("Removing old cache directory: %s", e.Path)
		err := os.RemoveAll(e.Path)
		// Allow IsNotExist in case a concurrent process already removed it
		if err != nil && !os.IsNotExist(err) {
			return nil, fmt.Errorf("could not remove old cache directory '%s': %v", e.Path, err)
		}
	}

	// If there is no existing file return an entry with a TmpPath for the caller
	// to use and then Finalize
	pathExists, err := fs.PathExists(e.Path)
	if err != nil {
		return nil, fmt.Errorf("could not check for cache entry '%s': %v", e.Path, err)
	}

	if !pathExists {
		e.Exists = false
		f, err := fs.MakeTmpFile(cacheDir, "tmp_", 0o700)
		if err != nil {
			return nil, err
		}
		err = f.Close()
		if err != nil {
			return nil, err
		}
		e.TmpPath = f.Name()
		return e, nil
	}

	// Double check that there isn't something else weird there
	if !fs.IsFile(e.Path) {
		return nil, fmt.Errorf("path '%s' exists but is not a file", e.Path)
	}

	// It exists in the cache and it's a file. Caller can use the Path directly
	e.Exists = true
	return e, nil
}

func (h *Handle) CleanCache(cacheType string, dryRun bool, days int) (err error) {
	dir := h.getCacheTypeDir(cacheType)

	files, err := os.ReadDir(dir)
	if (err != nil && os.IsNotExist(err)) || len(files) == 0 {
		sylog.Infof("No cached files to remove at %s", dir)
		return nil
	}

	errCount := 0
	for _, f := range files {
		if days >= 0 {
			fi, err := f.Info()
			if err != nil {
				sylog.Errorf("Could not get info for cache entry '%s': %v", f.Name(), err)
				errCount = errCount + 1
				continue
			}

			if time.Since(fi.ModTime()) < time.Duration(days*24)*time.Hour {
				sylog.Debugf("Skipping %s: less that %d days old", f.Name(), days)
				continue
			}
		}

		sylog.Infof("Removing %s cache entry: %s", cacheType, f.Name())
		if !dryRun {
			// We RemoveAll in case the entry is a directory from Singularity <3.6
			err := os.RemoveAll(path.Join(dir, f.Name()))
			if err != nil {
				sylog.Errorf("Could not remove cache entry '%s': %v", f.Name(), err)
				errCount = errCount + 1
			}
		}
	}

	if errCount > 0 {
		return fmt.Errorf("failed to remove %d cache entries", errCount)
	}

	return err
}

// IsDisabled returns true if the cache is disabled
func (h *Handle) IsDisabled() bool {
	return h.disabled
}

// Return the directory for a specific CacheType
func (h *Handle) getCacheTypeDir(cacheType string) string {
	return path.Join(h.rootDir, cacheType)
}

// New initializes a cache within the directory specified in Config.ParentDir
func New(cfg Config) (h *Handle, err error) {
	h = new(Handle)

	// Check whether the cache is disabled by the user.
	// strconv.ParseBool("") raises an error so we cannot directly use strconv.ParseBool(os.Getenv(DisableEnv))
	envCacheDisabled := os.Getenv(DisableEnv)
	if envCacheDisabled == "" {
		envCacheDisabled = "0"
	}

	// We check if the environment variable to disable the cache is set
	cacheDisabled, err := strconv.ParseBool(envCacheDisabled)
	if err != nil {
		return nil, fmt.Errorf("failed to parse environment variable %s: %s", DisableEnv, err)
	}
	// If the cache is not already disabled, we check if the configuration that was passed in
	// request the cache to be disabled
	if cacheDisabled || cfg.Disable {
		h.disabled = true
	}
	// If the cache is disabled, we stop here. Basically we return a valid handle that is not fully initialized
	// since it would create the directories required by an enabled cache.
	if h.disabled {
		return h, nil
	}

	// cfg is what is requested so we should not change any value that it contains
	parentDir := cfg.ParentDir
	if parentDir == "" {
		parentDir = getCacheParentDir()
	}
	h.parentDir = parentDir

	// If we can't access the parent of the cache directory then don't use the
	// cache.
	ep, err := fs.FirstExistingParent(parentDir)
	if err != nil {
		sylog.Warningf("Cache disabled - cannot access parent directory of cache: %s.", err)
		h.disabled = true
		return h, nil
	}

	// We check if we can write to the basedir or its first existing parent,
	// if not we disable the caching mechanism
	if !fs.IsWritable(ep) {
		sylog.Warningf("Cache disabled - cache location %s is not writable.", ep)
		h.disabled = true
		return h, nil
	}

	// Initialize the root directory of the cache
	rootDir := path.Join(parentDir, SubDirName)
	h.rootDir = rootDir
	if err = initCacheDir(rootDir); err != nil {
		return nil, fmt.Errorf("failed initializing cache root directory: %s", err)
	}
	// Initialize the subdirectories of the cache
	for _, ct := range AllCacheTypes {
		dir := h.getCacheTypeDir(ct)
		if err = initCacheDir(dir); err != nil {
			return nil, fmt.Errorf("failed initializing %s cache directory: %s", ct, err)
		}
		if stringInSlice(ct, OciCacheTypes) {
			if err = initLayout(dir); err != nil {
				return nil, fmt.Errorf("failed initializing %s cache oci layout: %s", ct, err)
			}
		}
	}

	return h, nil
}

// getCacheParentDir figures out where the parent directory of the cache is.
//
// Singularity makes the following assumptions:
//   - the default location for caches is specified by RootDefault
//   - a user can specify the environment variable specified by DirEnv to
//     change the location
//   - a user can change the location of a cache at any time
//   - but in the context of a Singularity command, the cache location
//     cannot change once the command starts executing
func getCacheParentDir() string {
	// If the user defined the special environment variable, we use its value
	// as base directory.
	parentDir := os.Getenv(DirEnv)
	if parentDir != "" {
		return parentDir
	}

	// If the environment variable is not set, we use the default cache.
	sylog.Debugf("environment variable %s not set, using default image cache", DirEnv)
	parentDir = syfs.ConfigDir()

	return parentDir
}

func initCacheDir(dir string) error {
	if fi, err := os.Stat(dir); os.IsNotExist(err) {
		sylog.Debugf("Creating cache directory: %s", dir)
		if err := fs.MkdirAll(dir, 0o700); err != nil {
			return fmt.Errorf("couldn't create cache directory %v: %v", dir, err)
		}
	} else if err != nil {
		return fmt.Errorf("unable to stat %s: %s", dir, err)
	} else if fi.Mode().Perm() != 0o700 {
		// enforce permission on cache directory to prevent
		// potential information leak
		if err := os.Chmod(dir, 0o700); err != nil {
			return fmt.Errorf("couldn't enforce permission 0700 on %s: %s", dir, err)
		}
	}
	return nil
}

func initLayout(dir string) error {
	if _, err := os.Stat(filepath.Join(dir, "index.json")); os.IsNotExist(err) {
		sylog.Debugf("Creating cache OCI layout: %s", dir)
		_, err := layout.Write(dir, empty.Index)
		return err
	}
	return nil
}

func stringInSlice(a string, list []string) bool {
	for _, b := range list {
		if b == a {
			return true
		}
	}
	return false
}