File: match.go

package info (click to toggle)
golang-github-bmatcuk-doublestar 4.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental, forky, sid, trixie
  • size: 212 kB
  • sloc: makefile: 6
file content (381 lines) | stat: -rw-r--r-- 11,558 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
package doublestar

import (
	"path/filepath"
	"unicode/utf8"
)

// Match reports whether name matches the shell pattern.
// The pattern syntax is:
//
//  pattern:
//    { term }
//  term:
//    '*'         matches any sequence of non-path-separators
//    '/**/'      matches zero or more directories
//    '?'         matches any single non-path-separator character
//    '[' [ '^' '!' ] { character-range } ']'
//                character class (must be non-empty)
//                starting with `^` or `!` negates the class
//    '{' { term } [ ',' { term } ... ] '}'
//                alternatives
//    c           matches character c (c != '*', '?', '\\', '[')
//    '\\' c      matches character c
//
//  character-range:
//    c           matches character c (c != '\\', '-', ']')
//    '\\' c      matches character c
//    lo '-' hi   matches character c for lo <= c <= hi
//
// Match returns true if `name` matches the file name `pattern`. `name` and
// `pattern` are split on forward slash (`/`) characters and may be relative or
// absolute.
//
// Match requires pattern to match all of name, not just a substring.
// The only possible returned error is ErrBadPattern, when pattern
// is malformed.
//
// A doublestar (`**`) should appear surrounded by path separators such as
// `/**/`.  A mid-pattern doublestar (`**`) behaves like bash's globstar
// option: a pattern such as `path/to/**.txt` would return the same results as
// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`.
//
// Note: this is meant as a drop-in replacement for path.Match() which
// always uses '/' as the path separator. If you want to support systems
// which use a different path separator (such as Windows), what you want
// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both
// pattern and name and then use this function.
//
// Note: users should _not_ count on the returned error,
// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
//
func Match(pattern, name string) (bool, error) {
	return matchWithSeparator(pattern, name, '/', true)
}

// PathMatch returns true if `name` matches the file name `pattern`. The
// difference between Match and PathMatch is that PathMatch will automatically
// use your system's path separator to split `name` and `pattern`. On systems
// where the path separator is `'\'`, escaping will be disabled.
//
// Note: this is meant as a drop-in replacement for filepath.Match(). It
// assumes that both `pattern` and `name` are using the system's path
// separator. If you can't be sure of that, use filepath.ToSlash() on both
// `pattern` and `name`, and then use the Match() function instead.
//
func PathMatch(pattern, name string) (bool, error) {
	return matchWithSeparator(pattern, name, filepath.Separator, true)
}

func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
	return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
}

func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
	patLen := len(pattern)
	nameLen := len(name)
	startOfSegment := true
MATCH:
	for nameIdx < nameLen {
		if patIdx < patLen {
			switch pattern[patIdx] {
			case '*':
				if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
					// doublestar - must begin with a path separator, otherwise we'll
					// treat it like a single star like bash
					patIdx++
					if startOfSegment {
						if patIdx >= patLen {
							// pattern ends in `/**`: return true
							return true, nil
						}

						// doublestar must also end with a path separator, otherwise we're
						// just going to treat the doublestar as a single star like bash
						patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
						if patRune == separator {
							patIdx += patRuneLen

							doublestarPatternBacktrack = patIdx
							doublestarNameBacktrack = nameIdx
							starPatternBacktrack = -1
							starNameBacktrack = -1
							continue
						}
					}
				}
				startOfSegment = false

				starPatternBacktrack = patIdx
				starNameBacktrack = nameIdx
				continue

			case '?':
				startOfSegment = false
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				if nameRune == separator {
					// `?` cannot match the separator
					break
				}

				patIdx++
				nameIdx += nameRuneLen
				continue

			case '[':
				startOfSegment = false
				if patIdx++; patIdx >= patLen {
					// class didn't end
					return false, ErrBadPattern
				}
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])

				matched := false
				negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
				if negate {
					patIdx++
				}

				if patIdx >= patLen || pattern[patIdx] == ']' {
					// class didn't end or empty character class
					return false, ErrBadPattern
				}

				last := utf8.MaxRune
				for patIdx < patLen && pattern[patIdx] != ']' {
					patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
					patIdx += patRuneLen

					// match a range
					if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
						if pattern[patIdx] == '\\' {
							// next character is escaped
							patIdx++
						}
						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
						patIdx += patRuneLen

						if last <= nameRune && nameRune <= patRune {
							matched = true
							break
						}

						// didn't match range - reset `last`
						last = utf8.MaxRune
						continue
					}

					// not a range - check if the next rune is escaped
					if patRune == '\\' {
						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
						patIdx += patRuneLen
					}

					// check if the rune matches
					if patRune == nameRune {
						matched = true
						break
					}

					// no matches yet
					last = patRune
				}

				if matched == negate {
					// failed to match - if we reached the end of the pattern, that means
					// we never found a closing `]`
					if patIdx >= patLen {
						return false, ErrBadPattern
					}
					break
				}

				closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true)
				if closingIdx == -1 {
					// no closing `]`
					return false, ErrBadPattern
				}

				patIdx += closingIdx + 1
				nameIdx += nameRuneLen
				continue

			case '{':
				startOfSegment = false
				beforeIdx := patIdx
				patIdx++
				closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\')
				if closingIdx == -1 {
					// no closing `}`
					return false, ErrBadPattern
				}
				closingIdx += patIdx

				for {
					commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
					if commaIdx == -1 {
						break
					}
					commaIdx += patIdx

					result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
					if result || err != nil {
						return result, err
					}

					patIdx = commaIdx + 1
				}
				return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)

			case '\\':
				if separator != '\\' {
					// next rune is "escaped" in the pattern - literal match
					if patIdx++; patIdx >= patLen {
						// pattern ended
						return false, ErrBadPattern
					}
				}
				fallthrough

			default:
				patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				if patRune != nameRune {
					if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
						// if this rune was meant to be escaped, we need to move patIdx
						// back to the backslash before backtracking or validating below
						patIdx--
					}
					break
				}

				patIdx += patRuneLen
				nameIdx += nameRuneLen
				startOfSegment = patRune == separator
				continue
			}
		}

		if starPatternBacktrack >= 0 {
			// `*` backtrack, but only if the `name` rune isn't the separator
			nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
			if nameRune != separator {
				starNameBacktrack += nameRuneLen
				patIdx = starPatternBacktrack
				nameIdx = starNameBacktrack
				startOfSegment = false
				continue
			}
		}

		if doublestarPatternBacktrack >= 0 {
			// `**` backtrack, advance `name` past next separator
			nameIdx = doublestarNameBacktrack
			for nameIdx < nameLen {
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				nameIdx += nameRuneLen
				if nameRune == separator {
					doublestarNameBacktrack = nameIdx
					patIdx = doublestarPatternBacktrack
					startOfSegment = true
					continue MATCH
				}
			}
		}

		if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) {
			return false, ErrBadPattern
		}
		return false, nil
	}

	if nameIdx < nameLen {
		// we reached the end of `pattern` before the end of `name`
		return false, nil
	}

	// we've reached the end of `name`; we've successfully matched if we've also
	// reached the end of `pattern`, or if the rest of `pattern` can match a
	// zero-length string
	return isZeroLengthPattern(pattern[patIdx:], separator)
}

func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) {
	// `/**`, `**/`, and `/**/` are special cases - a pattern such as `path/to/a/**` or `path/to/a/**/`
	// *should* match `path/to/a` because `a` might be a directory
	if pattern == "" ||
		pattern == "*" ||
		pattern == "**" ||
		pattern == string(separator)+"**" ||
		pattern == "**"+string(separator) ||
		pattern == string(separator)+"**"+string(separator) {
		return true, nil
	}

	if pattern[0] == '{' {
		closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\')
		if closingIdx == -1 {
			// no closing '}'
			return false, ErrBadPattern
		}
		closingIdx += 1

		patIdx := 1
		for {
			commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
			if commaIdx == -1 {
				break
			}
			commaIdx += patIdx

			ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator)
			if ret || err != nil {
				return
			}

			patIdx = commaIdx + 1
		}
		return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator)
	}

	// no luck - validate the rest of the pattern
	if !doValidatePattern(pattern, separator) {
		return false, ErrBadPattern
	}
	return false, nil
}

// Finds the index of the first unescaped byte `c`, or negative 1.
func indexUnescapedByte(s string, c byte, allowEscaping bool) int {
	l := len(s)
	for i := 0; i < l; i++ {
		if allowEscaping && s[i] == '\\' {
			// skip next byte
			i++
		} else if s[i] == c {
			return i
		}
	}
	return -1
}

// Assuming the byte before the beginning of `s` is an opening `{`, this
// function will find the index of the matching `}`. That is, it'll skip over
// any nested `{}` and account for escaping
func indexMatchedClosingAlt(s string, allowEscaping bool) int {
	alts := 1
	l := len(s)
	for i := 0; i < l; i++ {
		if allowEscaping && s[i] == '\\' {
			// skip next byte
			i++
		} else if s[i] == '{' {
			alts++
		} else if s[i] == '}' {
			if alts--; alts == 0 {
				return i
			}
		}
	}
	return -1
}