File: traverse.tcl

package info (click to toggle)
tcllib 2.0%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 83,560 kB
  • sloc: tcl: 306,798; ansic: 14,272; sh: 3,035; xml: 1,766; yacc: 1,157; pascal: 881; makefile: 124; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (393 lines) | stat: -rw-r--r-- 11,324 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# traverse.tcl --
#
#	Directory traversal.
#
# Copyright (c) 2006-2015 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.

package require Tcl 8.5 9

# OO core
if {[package vsatisfies [package present Tcl] 8.5 9]} {
    # Use new Tcl 8.5a6+ features to specify the allowed packages.
    # We can use anything above 1.3. This means v2 as well.
    package require snit 1.3-
} else {
    # For Tcl 8.{3,4} only snit1 of a suitable patchlevel is possible.
    package require snit 1.3
}
package require control  ; # Helpers for control structures
package require fileutil ; # -> fullnormalize

snit::type ::fileutil::traverse {

    # Incremental directory traversal.

    # API
    # create  %AUTO% basedirectory options... -> object
    # next    filevar                         -> boolean
    # foreach filevar script
    # files                                   -> list (path ...)

    # Options
    # -prefilter command-prefix
    # -filter    command-prefix
    # -errorcmd  command-prefix

    # Use cases
    #
    # (a) Basic incremental
    # - Create and configure a traversal object.
    # - Execute 'next' to retrieve one path at a time,
    #   until the command returns False, signaling that
    #   the iterator has exhausted the supply of paths.
    #   (The path is stored in the named variable).
    #
    # The execution of 'next' can be done in a loop, or via event
    # processing.

    # (b) Basic loop
    # - Create and configure a traversal object.
    # - Run a script for each path, using 'foreach'.
    #   This is a convenient standard wrapper around 'next'.
    #
    # The loop properly handles all possible Tcl result codes.

    # (c) Non-incremental, non-looping.
    # - Create and configure a traversal object.
    # - Retrieve a list of all paths via 'files'.

    # The -prefilter callback is executed for directories. Its result
    # determines if the traverser recurses into the directory or not.
    # The default is to always recurse into all directories. The call-
    # back is invoked with a single argument, the path of the
    # directory.
    #
    # The -filter callback is executed for all paths. Its result
    # determines if the current path is a valid result, and returned
    # by 'next'. The default is to accept all paths as valid. The
    # callback is invoked with a single argument, the path to check.

    # The -errorcmd callback is executed for all paths the traverser
    # has trouble with. Like being unable to cd into them, get their
    # status, etc. The default is to ignore any such problems. The
    # callback is invoked with a two arguments, the path for which the
    # error occured, and the error message. Errors thrown by the
    # filter callbacks are handled through this callback too. Errors
    # thrown by the error callback itself are not caught and ignored,
    # but allowed to pass to the caller, usually of 'next'.

    # Note: Low-level functionality, version and platform dependent is
    # implemented in procedures, and conditioally defined for optimal
    # use of features, etc. ...

    # Note: Traversal is done in depth-first pre-order.

    # Note: The options are handled only during
    # construction. Afterward they are read-only and attempts to
    # modify them will cause the system to throw errors.

    # ### ### ### ######### ######### #########
    ## Implementation

    option -filter    -default {} -readonly 1
    option -prefilter -default {} -readonly 1
    option -errorcmd  -default {} -readonly 1

    constructor {basedir args} {
	set _base $basedir
	$self configurelist $args
	return
    }

    method files {} {
	set files {}
	$self foreach f {lappend files $f}
	return $files
    }

    method foreach {fvar body} {
	upvar 1 $fvar currentfile

	# (Re-)initialize the traversal state on every call.
	$self Init

	while {[$self next currentfile]} {
	    set code [catch {uplevel 1 $body} result]

	    # decide what to do upon the return code:
	    #
	    #               0 - the body executed successfully
	    #               1 - the body raised an error
	    #               2 - the body invoked [return]
	    #               3 - the body invoked [break]
	    #               4 - the body invoked [continue]
	    # everything else - return and pass on the results
	    #
	    switch -exact -- $code {
		0 {}
		1 {
		    return -errorinfo [::control::ErrorInfoAsCaller uplevel foreach]  \
			    -errorcode $::errorCode -code error $result
		}
		3 {
		    # FRINK: nocheck
		    return
		}
		4 {}
		default {
		    return -code $code $result
		}
	    }
	}
	return
    }

    method next {fvar} {
	upvar 1 $fvar currentfile

	# Initialize on first call.
	if {!$_init} {
	    $self Init
	}

	# We (still) have valid paths in the result stack, return the
	# next one.

	if {[llength $_results]} {
	    set top      [lindex   $_results end]
	    set _results [lreplace $_results end end]
	    set currentfile $top
	    return 1
	}

	# Take the next directory waiting in the processing stack and
	# fill the result stack with all valid files and sub-
	# directories contained in it. Extend the processing queue
	# with all sub-directories not yet seen already (!circular
	# symlinks) and accepted by the prefilter. We stop iterating
	# when we either have no directories to process anymore, or
	# the result stack contains at least one path we can return.

	while {[llength $_pending]} {
	    set top      [lindex   $_pending end]
	    set _pending [lreplace $_pending end end]

	    # Directory accessible? Skip if not.
	    if {![ACCESS $top]} {
		Error $top "Inacessible directory"
		continue
	    }

	    # Expand the result stack with all files in the directory,
	    # modulo filtering.

	    foreach f [GLOBF $top] {
		if {![Valid $f]} continue
		lappend _results $f
	    }

	    # Expand the result stack with all sub-directories in the
	    # directory, modulo filtering. Further expand the
	    # processing stack with the same directories, if not seen
	    # yet and modulo pre-filtering.

	    foreach f [GLOBD $top] {
		if {
		    [string equal [file tail $f]  "."] ||
		    [string equal [file tail $f] ".."]
		} continue

		if {[Valid $f]} {
		    lappend _results $f
		}

		Enter $top $f
		if {[Cycle $f]} continue

		if {[Recurse $f]} {
		    lappend _pending $f
		}
	    }

	    # Stop expanding if we have paths to return.

	    if {[llength $_results]} {
		set top      [lindex   $_results end]
		set _results [lreplace $_results end end]
		set currentfile $top
		return 1
	    }
	}

	# Allow re-initialization with next call.

	set _init 0
	return 0
    }

    # ### ### ### ######### ######### #########
    ## Traversal state

    # * Initialization flag. Checked in 'next', reset by next when no
    #   more files are available. Set in 'Init'.
    # * Base directory (or file) to start the traversal from.
    # * Stack of prefiltered unknown directories waiting for
    #   processing, i.e. expansion (TOP at end).
    # * Stack of valid paths waiting to be returned as results.
    # * Set of directories already visited (normalized paths), for
    #   detection of circular symbolic links.

    variable _init         0  ; # Initialization flag.
    variable _base         {} ; # Base directory.
    variable _pending      {} ; # Processing stack.
    variable _results      {} ; # Result stack.

    # sym link handling (to break cycles, while allowing the following of non-cycle links).
    # Notes
    # - path parent   tracking is lexical.
    # - path identity tracking is based on the normalized path, i.e. the path with all
    #   symlinks resolved.
    # Maps
    # - path -> parent     (easier to follow the list than doing dirname's)
    # - path -> normalized (cache to avoid redundant calls of fullnormalize)
    # cycle <=> A parent's normalized form (NF) is identical to the current path's NF

    variable _parent -array {}
    variable _norm   -array {}

    # ### ### ### ######### ######### #########
    ## Internal helpers.

    proc Enter {parent path} {
	#puts ___E|$path
	upvar 1 _parent _parent _norm _norm
	set _parent($path) $parent
	set _norm($path)   [fileutil::fullnormalize $path]
    }

    proc Cycle {path} {
	upvar 1 _parent _parent _norm _norm
	set nform $_norm($path)
	set paren $_parent($path)
	while {$paren ne {}} {
	    if {$_norm($paren) eq $nform} { return yes }
	    set paren $_parent($paren)
	}
	return no
    }

    method Init {} {
	array unset _parent *
	array unset _norm   *

	# Path ok as result?
	if {[Valid $_base]} {
	    lappend _results $_base
	}

	# Expansion allowed by prefilter?
	if {[file isdirectory $_base] && [Recurse $_base]} {
	    Enter {} $_base
	    lappend _pending $_base
	}

	# System is set up now.
	set _init 1
	return
    }

    proc Valid {path} {
	#puts ___V|$path
	upvar 1 options options
	if {![llength $options(-filter)]} {return 1}
	set path [file normalize $path]
	set code [catch {uplevel \#0 [linsert $options(-filter) end $path]} valid]
	if {!$code} {return $valid}
	Error $path $valid
	return 0
    }

    proc Recurse {path} {
	#puts ___X|$path
	upvar 1 options options _norm _norm
	if {![llength $options(-prefilter)]} {return 1}
	set path [file normalize $path]
	set code [catch {uplevel \#0 [linsert $options(-prefilter) end $path]} valid]
	if {!$code} {return $valid}
	Error $path $valid
	return 0
    }

    proc Error {path msg} {
	upvar 1 options options
	if {![llength $options(-errorcmd)]} return
	set path [file normalize $path]
	uplevel \#0 [linsert $options(-errorcmd) end $path $msg]
	return
    }

    ##
    # ### ### ### ######### ######### #########
}

# ### ### ### ######### ######### #########
##

# Tcl 8.5+.
# We have to check readability of "current" on our own, glob
# changed to error out instead of returning nothing.

proc ::fileutil::traverse::ACCESS {args} {return 1}

proc ::fileutil::traverse::GLOBF {current} {
    if {![file readable $current] ||
	[BadLink $current]} {
	return {}
    }

    set res [lsort -unique [concat \
				[glob -nocomplain -directory $current -types f          -- *] \
				[glob -nocomplain -directory $current -types {hidden f} -- *]]]

    # Look for broken links (They are reported as neither file nor directory).
    foreach l [lsort -unique [concat \
				  [glob -nocomplain -directory $current -types l          -- *] \
				  [glob -nocomplain -directory $current -types {hidden l} -- *]]] {
	if {[file isfile      $l]} continue
	if {[file isdirectory $l]} continue
	lappend res $l
    }
    return [lsort -unique $res]
}

proc ::fileutil::traverse::GLOBD {current} {
    if {![file readable $current] ||
	[BadLink $current]} {
	return {}
    }

    lsort -unique [concat \
		       [glob -nocomplain -directory $current -types d          -- *] \
		       [glob -nocomplain -directory $current -types {hidden d} -- *]]
}

proc ::fileutil::traverse::BadLink {current} {
    if {[file type $current] ne "link"} { return no }

    set dst [file join [file dirname $current] [file readlink $current]]

    if {![file exists   $dst] ||
	![file readable $dst]} {
	return yes
    }

    return no
}

# ### ### ### ######### ######### #########
## Ready

package provide fileutil::traverse 0.7