File: match.go

package info (click to toggle)
golang-github-bmatcuk-doublestar 4.6.1-1
links: PTS, VCS
area: main
in suites: experimental, forky, sid, trixie
size: 212 kB
sloc: makefile: 6
file content (381 lines) | stat: -rw-r--r-- 11,558 bytes
parent folder | download | duplicates (2)
package doublestar

import (
	"path/filepath"
	"unicode/utf8"
)

// Match reports whether name matches the shell pattern.
// The pattern syntax is:
//
//  pattern:
//    { term }
//  term:
//    '*'         matches any sequence of non-path-separators
//    '/**/'      matches zero or more directories
//    '?'         matches any single non-path-separator character
//    '[' [ '^' '!' ] { character-range } ']'
//                character class (must be non-empty)
//                starting with `^` or `!` negates the class
//    '{' { term } [ ',' { term } ... ] '}'
//                alternatives
//    c           matches character c (c != '*', '?', '\\', '[')
//    '\\' c      matches character c
//
//  character-range:
//    c           matches character c (c != '\\', '-', ']')
//    '\\' c      matches character c
//    lo '-' hi   matches character c for lo <= c <= hi
//
// Match returns true if `name` matches the file name `pattern`. `name` and
// `pattern` are split on forward slash (`/`) characters and may be relative or
// absolute.
//
// Match requires pattern to match all of name, not just a substring.
// The only possible returned error is ErrBadPattern, when pattern
// is malformed.
//
// A doublestar (`**`) should appear surrounded by path separators such as
// `/**/`.  A mid-pattern doublestar (`**`) behaves like bash's globstar
// option: a pattern such as `path/to/**.txt` would return the same results as
// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`.
//
// Note: this is meant as a drop-in replacement for path.Match() which
// always uses '/' as the path separator. If you want to support systems
// which use a different path separator (such as Windows), what you want
// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both
// pattern and name and then use this function.
//
// Note: users should _not_ count on the returned error,
// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
//
func Match(pattern, name string) (bool, error) {
	return matchWithSeparator(pattern, name, '/', true)
}

// PathMatch returns true if `name` matches the file name `pattern`. The
// difference between Match and PathMatch is that PathMatch will automatically
// use your system's path separator to split `name` and `pattern`. On systems
// where the path separator is `'\'`, escaping will be disabled.
//
// Note: this is meant as a drop-in replacement for filepath.Match(). It
// assumes that both `pattern` and `name` are using the system's path
// separator. If you can't be sure of that, use filepath.ToSlash() on both
// `pattern` and `name`, and then use the Match() function instead.
//
func PathMatch(pattern, name string) (bool, error) {
	return matchWithSeparator(pattern, name, filepath.Separator, true)
}

func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
	return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
}

func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
	patLen := len(pattern)
	nameLen := len(name)
	startOfSegment := true
MATCH:
	for nameIdx < nameLen {
		if patIdx < patLen {
			switch pattern[patIdx] {
			case '*':
				if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
					// doublestar - must begin with a path separator, otherwise we'll
					// treat it like a single star like bash
					patIdx++
					if startOfSegment {
						if patIdx >= patLen {
							// pattern ends in `/**`: return true
							return true, nil
						}

						// doublestar must also end with a path separator, otherwise we're
						// just going to treat the doublestar as a single star like bash
						patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
						if patRune == separator {
							patIdx += patRuneLen

							doublestarPatternBacktrack = patIdx
							doublestarNameBacktrack = nameIdx
							starPatternBacktrack = -1
							starNameBacktrack = -1
							continue
						}
					}
				}
				startOfSegment = false

				starPatternBacktrack = patIdx
				starNameBacktrack = nameIdx
				continue

			case '?':
				startOfSegment = false
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				if nameRune == separator {
					// `?` cannot match the separator
					break
				}

				patIdx++
				nameIdx += nameRuneLen
				continue

			case '[':
				startOfSegment = false
				if patIdx++; patIdx >= patLen {
					// class didn't end
					return false, ErrBadPattern
				}
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])

				matched := false
				negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
				if negate {
					patIdx++
				}

				if patIdx >= patLen || pattern[patIdx] == ']' {
					// class didn't end or empty character class
					return false, ErrBadPattern
				}

				last := utf8.MaxRune
				for patIdx < patLen && pattern[patIdx] != ']' {
					patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
					patIdx += patRuneLen

					// match a range
					if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
						if pattern[patIdx] == '\\' {
							// next character is escaped
							patIdx++
						}
						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
						patIdx += patRuneLen

						if last <= nameRune && nameRune <= patRune {
							matched = true
							break
						}

						// didn't match range - reset `last`
						last = utf8.MaxRune
						continue
					}

					// not a range - check if the next rune is escaped
					if patRune == '\\' {
						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
						patIdx += patRuneLen
					}

					// check if the rune matches
					if patRune == nameRune {
						matched = true
						break
					}

					// no matches yet
					last = patRune
				}

				if matched == negate {
					// failed to match - if we reached the end of the pattern, that means
					// we never found a closing `]`
					if patIdx >= patLen {
						return false, ErrBadPattern
					}
					break
				}

				closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true)
				if closingIdx == -1 {
					// no closing `]`
					return false, ErrBadPattern
				}

				patIdx += closingIdx + 1
				nameIdx += nameRuneLen
				continue

			case '{':
				startOfSegment = false
				beforeIdx := patIdx
				patIdx++
				closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\')
				if closingIdx == -1 {
					// no closing `}`
					return false, ErrBadPattern
				}
				closingIdx += patIdx

				for {
					commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
					if commaIdx == -1 {
						break
					}
					commaIdx += patIdx

					result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
					if result || err != nil {
						return result, err
					}

					patIdx = commaIdx + 1
				}
				return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)

			case '\\':
				if separator != '\\' {
					// next rune is "escaped" in the pattern - literal match
					if patIdx++; patIdx >= patLen {
						// pattern ended
						return false, ErrBadPattern
					}
				}
				fallthrough

			default:
				patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				if patRune != nameRune {
					if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
						// if this rune was meant to be escaped, we need to move patIdx
						// back to the backslash before backtracking or validating below
						patIdx--
					}
					break
				}

				patIdx += patRuneLen
				nameIdx += nameRuneLen
				startOfSegment = patRune == separator
				continue
			}
		}

		if starPatternBacktrack >= 0 {
			// `*` backtrack, but only if the `name` rune isn't the separator
			nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
			if nameRune != separator {
				starNameBacktrack += nameRuneLen
				patIdx = starPatternBacktrack
				nameIdx = starNameBacktrack
				startOfSegment = false
				continue
			}
		}

		if doublestarPatternBacktrack >= 0 {
			// `**` backtrack, advance `name` past next separator
			nameIdx = doublestarNameBacktrack
			for nameIdx < nameLen {
				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
				nameIdx += nameRuneLen
				if nameRune == separator {
					doublestarNameBacktrack = nameIdx
					patIdx = doublestarPatternBacktrack
					startOfSegment = true
					continue MATCH
				}
			}
		}

		if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) {
			return false, ErrBadPattern
		}
		return false, nil
	}

	if nameIdx < nameLen {
		// we reached the end of `pattern` before the end of `name`
		return false, nil
	}

	// we've reached the end of `name`; we've successfully matched if we've also
	// reached the end of `pattern`, or if the rest of `pattern` can match a
	// zero-length string
	return isZeroLengthPattern(pattern[patIdx:], separator)
}

func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) {
	// `/**`, `**/`, and `/**/` are special cases - a pattern such as `path/to/a/**` or `path/to/a/**/`
	// *should* match `path/to/a` because `a` might be a directory
	if pattern == "" ||
		pattern == "*" ||
		pattern == "**" ||
		pattern == string(separator)+"**" ||
		pattern == "**"+string(separator) ||
		pattern == string(separator)+"**"+string(separator) {
		return true, nil
	}

	if pattern[0] == '{' {
		closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\')
		if closingIdx == -1 {
			// no closing '}'
			return false, ErrBadPattern
		}
		closingIdx += 1

		patIdx := 1
		for {
			commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
			if commaIdx == -1 {
				break
			}
			commaIdx += patIdx

			ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator)
			if ret || err != nil {
				return
			}

			patIdx = commaIdx + 1
		}
		return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator)
	}

	// no luck - validate the rest of the pattern
	if !doValidatePattern(pattern, separator) {
		return false, ErrBadPattern
	}
	return false, nil
}

// Finds the index of the first unescaped byte `c`, or negative 1.
func indexUnescapedByte(s string, c byte, allowEscaping bool) int {
	l := len(s)
	for i := 0; i < l; i++ {
		if allowEscaping && s[i] == '\\' {
			// skip next byte
			i++
		} else if s[i] == c {
			return i
		}
	}
	return -1
}

// Assuming the byte before the beginning of `s` is an opening `{`, this
// function will find the index of the matching `}`. That is, it'll skip over
// any nested `{}` and account for escaping
func indexMatchedClosingAlt(s string, allowEscaping bool) int {
	alts := 1
	l := len(s)
	for i := 0; i < l; i++ {
		if allowEscaping && s[i] == '\\' {
			// skip next byte
			i++
		} else if s[i] == '{' {
			alts++
		} else if s[i] == '}' {
			if alts--; alts == 0 {
				return i
			}
		}
	}
	return -1
}