1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
package revision
import (
"bufio"
"io"
"unicode"
)
// runeCategoryValidator takes a rune as input and
// validates it belongs to a rune category
type runeCategoryValidator func(r rune) bool
// tokenizeExpression aggregates a series of runes matching check predicate into a single
// string and provides given tokenType as token type
func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) {
var data []rune
data = append(data, ch)
for {
c, _, err := r.ReadRune()
if c == zeroRune {
break
}
if err != nil {
return tokenError, "", err
}
if check(c) {
data = append(data, c)
} else {
err := r.UnreadRune()
if err != nil {
return tokenError, "", err
}
return tokenType, string(data), nil
}
}
return tokenType, string(data), nil
}
// maxRevisionLength holds the maximum length that will be parsed for a
// revision. Git itself doesn't enforce a max length, but rather leans on
// the OS to enforce it via its ARG_MAX.
const maxRevisionLength = 128 * 1024 // 128kb
var zeroRune = rune(0)
// scanner represents a lexical scanner.
type scanner struct {
r *bufio.Reader
}
// newScanner returns a new instance of scanner.
func newScanner(r io.Reader) *scanner {
return &scanner{r: bufio.NewReader(io.LimitReader(r, maxRevisionLength))}
}
// Scan extracts tokens and their strings counterpart
// from the reader
func (s *scanner) scan() (token, string, error) {
ch, _, err := s.r.ReadRune()
if err != nil && err != io.EOF {
return tokenError, "", err
}
switch ch {
case zeroRune:
return eof, "", nil
case ':':
return colon, string(ch), nil
case '~':
return tilde, string(ch), nil
case '^':
return caret, string(ch), nil
case '.':
return dot, string(ch), nil
case '/':
return slash, string(ch), nil
case '{':
return obrace, string(ch), nil
case '}':
return cbrace, string(ch), nil
case '-':
return minus, string(ch), nil
case '@':
return at, string(ch), nil
case '\\':
return aslash, string(ch), nil
case '?':
return qmark, string(ch), nil
case '*':
return asterisk, string(ch), nil
case '[':
return obracket, string(ch), nil
case '!':
return emark, string(ch), nil
}
if unicode.IsSpace(ch) {
return space, string(ch), nil
}
if unicode.IsControl(ch) {
return control, string(ch), nil
}
if unicode.IsLetter(ch) {
return tokenizeExpression(ch, word, unicode.IsLetter, s.r)
}
if unicode.IsNumber(ch) {
return tokenizeExpression(ch, number, unicode.IsNumber, s.r)
}
return tokenError, string(ch), nil
}
|