File: fs_memoized.go

package info (click to toggle)
golang-github-cue-lang-cue 0.12.0.-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 19,072 kB
  • sloc: sh: 57; makefile: 17
file content (171 lines) | stat: -rw-r--r-- 4,686 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cache

import (
	"context"
	"os"
	"sync"
	"time"

	"cuelang.org/go/internal/golangorgx/gopls/file"
	"cuelang.org/go/internal/golangorgx/gopls/protocol"
	"cuelang.org/go/internal/golangorgx/tools/event"
	"cuelang.org/go/internal/golangorgx/tools/event/tag"
	"cuelang.org/go/internal/golangorgx/tools/robustio"
)

// A memoizedFS is a file source that memoizes reads, to reduce IO.
type memoizedFS struct {
	mu sync.Mutex

	// filesByID maps existing file inodes to the result of a read.
	// (The read may have failed, e.g. due to EACCES or a delete between stat+read.)
	// Each slice is a non-empty list of aliases: different URIs.
	filesByID map[robustio.FileID][]*diskFile
}

func newMemoizedFS() *memoizedFS {
	return &memoizedFS{filesByID: make(map[robustio.FileID][]*diskFile)}
}

// A diskFile is a file in the filesystem, or a failure to read one.
// It implements the file.Source interface.
type diskFile struct {
	uri     protocol.DocumentURI
	modTime time.Time
	content []byte
	hash    file.Hash
	err     error
}

func (h *diskFile) URI() protocol.DocumentURI { return h.uri }

func (h *diskFile) Identity() file.Identity {
	return file.Identity{
		URI:  h.uri,
		Hash: h.hash,
	}
}

func (h *diskFile) SameContentsOnDisk() bool { return true }
func (h *diskFile) Version() int32           { return 0 }
func (h *diskFile) Content() ([]byte, error) { return h.content, h.err }

// ReadFile stats and (maybe) reads the file, updates the cache, and returns it.
func (fs *memoizedFS) ReadFile(ctx context.Context, uri protocol.DocumentURI) (file.Handle, error) {
	id, mtime, err := robustio.GetFileID(uri.Path())
	if err != nil {
		// file does not exist
		return &diskFile{
			err: err,
			uri: uri,
		}, nil
	}

	// We check if the file has changed by comparing modification times. Notably,
	// this is an imperfect heuristic as various systems have low resolution
	// mtimes (as much as 1s on WSL or s390x builders), so we only cache
	// filehandles if mtime is old enough to be reliable, meaning that we don't
	// expect a subsequent write to have the same mtime.
	//
	// The coarsest mtime precision we've seen in practice is 1s, so consider
	// mtime to be unreliable if it is less than 2s old. Capture this before
	// doing anything else.
	recentlyModified := time.Since(mtime) < 2*time.Second

	fs.mu.Lock()
	fhs, ok := fs.filesByID[id]
	if ok && fhs[0].modTime.Equal(mtime) {
		var fh *diskFile
		// We have already seen this file and it has not changed.
		for _, h := range fhs {
			if h.uri == uri {
				fh = h
				break
			}
		}
		// No file handle for this exact URI. Create an alias, but share content.
		if fh == nil {
			newFH := *fhs[0]
			newFH.uri = uri
			fh = &newFH
			fhs = append(fhs, fh)
			fs.filesByID[id] = fhs
		}
		fs.mu.Unlock()
		return fh, nil
	}
	fs.mu.Unlock()

	// Unknown file, or file has changed. Read (or re-read) it.
	fh, err := readFile(ctx, uri, mtime) // ~25us
	if err != nil {
		return nil, err // e.g. cancelled (not: read failed)
	}

	fs.mu.Lock()
	if !recentlyModified {
		fs.filesByID[id] = []*diskFile{fh}
	} else {
		delete(fs.filesByID, id)
	}
	fs.mu.Unlock()
	return fh, nil
}

// fileStats returns information about the set of files stored in fs. It is
// intended for debugging only.
func (fs *memoizedFS) fileStats() (files, largest, errs int) {
	fs.mu.Lock()
	defer fs.mu.Unlock()

	files = len(fs.filesByID)
	largest = 0
	errs = 0

	for _, files := range fs.filesByID {
		rep := files[0]
		if len(rep.content) > largest {
			largest = len(rep.content)
		}
		if rep.err != nil {
			errs++
		}
	}
	return files, largest, errs
}

// ioLimit limits the number of parallel file reads per process.
var ioLimit = make(chan struct{}, 128)

func readFile(ctx context.Context, uri protocol.DocumentURI, mtime time.Time) (*diskFile, error) {
	select {
	case ioLimit <- struct{}{}:
	case <-ctx.Done():
		return nil, ctx.Err()
	}
	defer func() { <-ioLimit }()

	ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Path()))
	_ = ctx
	defer done()

	// It is possible that a race causes us to read a file with different file
	// ID, or whose mtime differs from the given mtime. However, in these cases
	// we expect the client to notify of a subsequent file change, and the file
	// content should be eventually consistent.
	content, err := os.ReadFile(uri.Path()) // ~20us
	if err != nil {
		content = nil // just in case
	}
	return &diskFile{
		modTime: mtime,
		uri:     uri,
		content: content,
		hash:    file.HashOf(content),
		err:     err,
	}, nil
}