File: fnmatch.go

package info (click to toggle)
golang-github-danwakefield-fnmatch 0.0~git20160403.cbb64ac-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, experimental, sid
  • size: 80 kB
  • sloc: makefile: 2
file content (219 lines) | stat: -rw-r--r-- 5,775 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
// Provide string-matching based on fnmatch.3
package fnmatch

// There are a few issues that I believe to be bugs, but this implementation is
// based as closely as possible on BSD fnmatch. These bugs are present in the
// source of BSD fnmatch, and so are replicated here. The issues are as follows:
//
// * FNM_PERIOD is no longer observed after the first * in a pattern
//   This only applies to matches done with FNM_PATHNAME as well
// * FNM_PERIOD doesn't apply to ranges. According to the documentation,
//   a period must be matched explicitly, but a range will match it too

import (
	"unicode"
	"unicode/utf8"
)

const (
	FNM_NOESCAPE = (1 << iota)
	FNM_PATHNAME
	FNM_PERIOD

	FNM_LEADING_DIR
	FNM_CASEFOLD

	FNM_IGNORECASE = FNM_CASEFOLD
	FNM_FILE_NAME  = FNM_PATHNAME
)

func unpackRune(str *string) rune {
	rune, size := utf8.DecodeRuneInString(*str)
	*str = (*str)[size:]
	return rune
}

// Matches the pattern against the string, with the given flags,
// and returns true if the match is successful.
// This function should match fnmatch.3 as closely as possible.
func Match(pattern, s string, flags int) bool {
	// The implementation for this function was patterned after the BSD fnmatch.c
	// source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html
	noescape := (flags&FNM_NOESCAPE != 0)
	pathname := (flags&FNM_PATHNAME != 0)
	period := (flags&FNM_PERIOD != 0)
	leadingdir := (flags&FNM_LEADING_DIR != 0)
	casefold := (flags&FNM_CASEFOLD != 0)
	// the following is some bookkeeping that the original fnmatch.c implementation did not do
	// We are forced to do this because we're not keeping indexes into C strings but rather
	// processing utf8-encoded strings. Use a custom unpacker to maintain our state for us
	sAtStart := true
	sLastAtStart := true
	sLastSlash := false
	sLastUnpacked := rune(0)
	unpackS := func() rune {
		sLastSlash = (sLastUnpacked == '/')
		sLastUnpacked = unpackRune(&s)
		sLastAtStart = sAtStart
		sAtStart = false
		return sLastUnpacked
	}
	for len(pattern) > 0 {
		c := unpackRune(&pattern)
		switch c {
		case '?':
			if len(s) == 0 {
				return false
			}
			sc := unpackS()
			if pathname && sc == '/' {
				return false
			}
			if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) {
				return false
			}
		case '*':
			// collapse multiple *'s
			// don't use unpackRune here, the only char we care to detect is ASCII
			for len(pattern) > 0 && pattern[0] == '*' {
				pattern = pattern[1:]
			}
			if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) {
				return false
			}
			// optimize for patterns with * at end or before /
			if len(pattern) == 0 {
				if pathname {
					return leadingdir || (strchr(s, '/') == -1)
				} else {
					return true
				}
				return !(pathname && strchr(s, '/') >= 0)
			} else if pathname && pattern[0] == '/' {
				offset := strchr(s, '/')
				if offset == -1 {
					return false
				} else {
					// we already know our pattern and string have a /, skip past it
					s = s[offset:] // use unpackS here to maintain our bookkeeping state
					unpackS()
					pattern = pattern[1:] // we know / is one byte long
					break
				}
			}
			// general case, recurse
			for test := s; len(test) > 0; unpackRune(&test) {
				// I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified
				// but this follows exactly from how fnmatch.c implements it
				if Match(pattern, test, (flags &^ FNM_PERIOD)) {
					return true
				} else if pathname && test[0] == '/' {
					break
				}
			}
			return false
		case '[':
			if len(s) == 0 {
				return false
			}
			if pathname && s[0] == '/' {
				return false
			}
			sc := unpackS()
			if !rangematch(&pattern, sc, flags) {
				return false
			}
		case '\\':
			if !noescape {
				if len(pattern) > 0 {
					c = unpackRune(&pattern)
				}
			}
			fallthrough
		default:
			if len(s) == 0 {
				return false
			}
			sc := unpackS()
			switch {
			case sc == c:
			case casefold && unicode.ToLower(sc) == unicode.ToLower(c):
			default:
				return false
			}
		}
	}
	return len(s) == 0 || (leadingdir && s[0] == '/')
}

func rangematch(pattern *string, test rune, flags int) bool {
	if len(*pattern) == 0 {
		return false
	}
	casefold := (flags&FNM_CASEFOLD != 0)
	noescape := (flags&FNM_NOESCAPE != 0)
	if casefold {
		test = unicode.ToLower(test)
	}
	var negate, matched bool
	if (*pattern)[0] == '^' || (*pattern)[0] == '!' {
		negate = true
		(*pattern) = (*pattern)[1:]
	}
	for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' {
		c := unpackRune(pattern)
		if !noescape && c == '\\' {
			if len(*pattern) > 1 {
				c = unpackRune(pattern)
			} else {
				return false
			}
		}
		if casefold {
			c = unicode.ToLower(c)
		}
		if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' {
			unpackRune(pattern) // skip the -
			c2 := unpackRune(pattern)
			if !noescape && c2 == '\\' {
				if len(*pattern) > 0 {
					c2 = unpackRune(pattern)
				} else {
					return false
				}
			}
			if casefold {
				c2 = unicode.ToLower(c2)
			}
			// this really should be more intelligent, but it looks like
			// fnmatch.c does simple int comparisons, therefore we will as well
			if c <= test && test <= c2 {
				matched = true
			}
		} else if c == test {
			matched = true
		}
	}
	// skip past the rest of the pattern
	ok := false
	for !ok && len(*pattern) > 0 {
		c := unpackRune(pattern)
		if c == '\\' && len(*pattern) > 0 {
			unpackRune(pattern)
		} else if c == ']' {
			ok = true
		}
	}
	return ok && matched != negate
}

// define strchr because strings.Index() seems a bit overkill
// returns the index of c in s, or -1 if there is no match
func strchr(s string, c rune) int {
	for i, sc := range s {
		if sc == c {
			return i
		}
	}
	return -1
}