File: cwalk.go

package info (click to toggle)
golang-github-iafan-cwalk 0.0~git20191125.dd7f505-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 88 kB
  • sloc: makefile: 4
file content (256 lines) | stat: -rw-r--r-- 6,003 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
package cwalk

import (
	"errors"
	"fmt"
	"os"
	"path/filepath"
	"runtime"
	"strings"
	"sync"
)

// NumWorkers defines how many workers to run
// on each Walk() function invocation
var NumWorkers = runtime.GOMAXPROCS(0)

// BufferSize defines the size of the job buffer
var BufferSize = NumWorkers

// ErrNotDir indicates that the path, which is being passed
// to a walker function, does not point to a directory
var ErrNotDir = errors.New("Not a directory")

// WalkerError struct stores individual errors reported from each worker routine
type WalkerError struct {
	error error
	path  string
}

// WalkerErrorList struct store a list of errors reported from all worker routines
type WalkerErrorList struct {
	ErrorList []WalkerError
}

// Implement the error interface for WalkerError
func (we WalkerError) Error() string {
	return we.error.Error()
}

// Implement the error interface fo WalkerErrorList
func (wel WalkerErrorList) Error() string {
	if len(wel.ErrorList) > 0 {
		out := make([]string, len(wel.ErrorList))
		for i, err := range wel.ErrorList {
			out[i] = err.Error()
		}
		return strings.Join(out, "\n")
	}
	return ""
}

// Walker is constructed for each Walk() function invocation
type Walker struct {
	wg             sync.WaitGroup
	ewg            sync.WaitGroup // a separate wg for error collection
	jobs           chan string
	root           string
	followSymlinks bool
	walkFunc       filepath.WalkFunc
	errors         chan WalkerError
	errorList      WalkerErrorList // this is where we store the errors as we go
}

// the readDirNames function below was taken from the original
// implementation (see https://golang.org/src/path/filepath/path.go)
// but has sorting removed (sorting doesn't make sense
// in concurrent execution, anyway)

// readDirNames reads the directory named by dirname and returns
// a list of directory entries.
func readDirNames(dirname string) ([]string, error) {
	f, err := os.Open(dirname)
	if err != nil {
		return nil, err
	}
	names, err := f.Readdirnames(-1)
	f.Close()
	if err != nil {
		return nil, err
	}
	return names, nil
}

// lstat is a wrapper for os.Lstat which accepts a path
// relative to Walker.root and also follows symlinks
func (w *Walker) lstat(relpath string) (info os.FileInfo, err error) {
	path := filepath.Join(w.root, relpath)
	info, err = os.Lstat(path)
	if err != nil {
		return nil, err
	}
	// check if this is a symlink
	if w.followSymlinks && info.Mode()&os.ModeSymlink > 0 {
		path, err = filepath.EvalSymlinks(path)
		if err != nil {
			return nil, err
		}
		info, err = os.Lstat(path)
		if err != nil {
			return nil, err
		}
	}
	return
}

// collectErrors processes any any errors passed via the error channel
// and stores them in the errorList
func (w *Walker) collectErrors() {
	defer w.ewg.Done()
	for err := range w.errors {
		w.errorList.ErrorList = append(w.errorList.ErrorList, err)
	}
}

// processPath processes one directory and adds
// its subdirectories to the queue for further processing
func (w *Walker) processPath(relpath string) error {
	defer w.wg.Done()

	path := filepath.Join(w.root, relpath)
	names, err := readDirNames(path)
	if err != nil {
		return err
	}

	for _, name := range names {
		subpath := filepath.Join(relpath, name)
		info, err := w.lstat(subpath)

		err = w.walkFunc(subpath, info, err)

		if err == filepath.SkipDir {
			return nil
		}

		if err != nil {
			w.errors <- WalkerError{
				error: err,
				path:  subpath,
			}
			continue
		}

		if info == nil {
			w.errors <- WalkerError{
				error: fmt.Errorf("Broken symlink: %s", subpath),
				path:  subpath,
			}
			continue
		}

		if info.IsDir() {
			w.addJob(subpath)
		}
	}
	return nil
}

// addJob increments the job counter
// and pushes the path to the jobs channel
func (w *Walker) addJob(path string) {
	w.wg.Add(1)
	select {
	// try to push the job to the channel
	case w.jobs <- path: // ok
	default: // buffer overflow
		// process job synchronously
		err := w.processPath(path)
		if err != nil {
			w.errors <- WalkerError{
				error: err,
				path:  path,
			}
		}
	}
}

// worker processes all the jobs
// until the jobs channel is explicitly closed
func (w *Walker) worker() {
	for path := range w.jobs {
		err := w.processPath(path)
		if err != nil {
			w.errors <- WalkerError{
				error: err,
				path:  path,
			}
		}
	}

}

// Walk recursively descends into subdirectories,
// calling walkFn for each file or directory
// in the tree, including the root directory.
func (w *Walker) Walk(relpath string, walkFn filepath.WalkFunc) error {
	w.errors = make(chan WalkerError, BufferSize)
	w.jobs = make(chan string, BufferSize)
	w.walkFunc = walkFn

	w.ewg.Add(1) // a separate error waitgroup so we wait until all errors are reported before exiting
	go w.collectErrors()

	info, err := w.lstat(relpath)
	err = w.walkFunc(relpath, info, err)
	if err == filepath.SkipDir {
		return nil
	}
	if err != nil {
		return err
	}

	if info == nil {
		return fmt.Errorf("Broken symlink: %s", relpath)
	}

	if !info.IsDir() {
		return ErrNotDir
	}

	// spawn workers
	for n := 1; n <= NumWorkers; n++ {
		go w.worker()
	}
	w.addJob(relpath) // add this path as a first job
	w.wg.Wait()       // wait till all paths are processed
	close(w.jobs)     // signal workers to close
	close(w.errors)   // signal errors to close
	w.ewg.Wait()      // wait for all errors to be collected

	if len(w.errorList.ErrorList) > 0 {
		return w.errorList
	}
	return nil
}

// Walk is a wrapper function for the Walker object
// that mimics the behavior of filepath.Walk,
// and doesn't follow symlinks.
func Walk(root string, walkFn filepath.WalkFunc) error {
	w := Walker{
		root: root,
	}
	return w.Walk("", walkFn)
}

// WalkWithSymlinks is a wrapper function for the Walker object
// that mimics the behavior of filepath.Walk, but follows
// directory symlinks.
func WalkWithSymlinks(root string, walkFn filepath.WalkFunc) error {
	w := Walker{
		root:           root,
		followSymlinks: true,
	}
	return w.Walk("", walkFn)
}