1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
package argv
import (
"unicode"
)
// Scanner is a cmdline string scanner.
//
// It split cmdline string to tokens: space, string, pipe, reverse quote string.
type Scanner struct {
text []rune
rpos int
}
// NewScanner create a scanner and init it's internal states.
func NewScanner(text string) *Scanner {
return &Scanner{
text: []rune(text),
}
}
const _RuneEOF = 0
func (s *Scanner) nextRune() rune {
if s.rpos >= len(s.text) {
return _RuneEOF
}
r := s.text[s.rpos]
s.rpos++
return r
}
func (s *Scanner) unreadRune(r rune) {
if r != _RuneEOF {
s.rpos--
}
}
func (s *Scanner) isEscapeChars(r rune) (rune, bool) {
switch r {
case 'a':
return '\a', true
case 'b':
return '\b', true
case 'f':
return '\f', true
case 'n':
return '\n', true
case 'r':
return '\r', true
case 't':
return '\t', true
case 'v':
return '\v', true
case '\\':
return '\\', true
case '$':
return '$', true
}
return r, false
}
// TokenType is the type of tokens recognized by the scanner.
type TokenType uint32
// Token is generated by the scanner with a type and value.
type Token struct {
Type TokenType
Value []rune
}
const (
// TokString for string
TokString TokenType = iota + 1
TokStringSingleQuote
TokStringDoubleQuote
// TokPipe is the '|' character
TokPipe
// TokBackQuote is reverse quoted string
TokBackQuote
// TokSpace represent space character sequence
TokSpace
// TokEOF means the input end.
TokEOF
)
// Next return next token, if it reach the end, TOK_EOF will be returned.
//
// Error is returned for invalid syntax such as unpaired quotes.
func (s *Scanner) Next() (Token, error) {
const (
Initial = iota + 1
Space
BackQuote
String
StringSingleQuote
StringDoubleQuote
)
var (
tok Token
state uint8 = Initial
)
for {
r := s.nextRune()
switch state {
case Initial:
switch {
case r == _RuneEOF:
tok.Type = TokEOF
return tok, nil
case r == '|':
tok.Type = TokPipe
return tok, nil
case r == '`':
state = BackQuote
case unicode.IsSpace(r):
state = Space
s.unreadRune(r)
case r == '\'':
state = StringSingleQuote
case r == '"':
state = StringDoubleQuote
default:
state = String
s.unreadRune(r)
}
case Space:
if r == _RuneEOF || !unicode.IsSpace(r) {
s.unreadRune(r)
tok.Type = TokSpace
return tok, nil
}
case BackQuote:
switch r {
case _RuneEOF:
return tok, ErrInvalidSyntax
case '`':
tok.Type = TokBackQuote
return tok, nil
default:
tok.Value = append(tok.Value, r)
}
case String:
switch {
case r == _RuneEOF, r == '|', r == '`', r == '\'', r == '"', unicode.IsSpace(r):
tok.Type = TokString
s.unreadRune(r)
return tok, nil
case r == '\\':
nr := s.nextRune()
if nr == _RuneEOF {
return tok, ErrInvalidSyntax
}
tok.Value = append(tok.Value, nr)
default:
tok.Value = append(tok.Value, r)
}
case StringSingleQuote, StringDoubleQuote:
switch r {
case _RuneEOF:
return tok, ErrInvalidSyntax
case '\'', '"':
if singleQuote := state == StringSingleQuote; singleQuote == (r == '\'') {
if singleQuote {
tok.Type = TokStringSingleQuote
} else {
tok.Type = TokStringDoubleQuote
}
return tok, nil
} else {
tok.Value = append(tok.Value, r)
}
case '\\':
nr := s.nextRune()
if escape, ok := s.isEscapeChars(nr); ok {
tok.Value = append(tok.Value, escape)
} else {
tok.Value = append(tok.Value, r)
s.unreadRune(nr)
}
default:
tok.Value = append(tok.Value, r)
}
}
}
}
// Scan is a utility function help split input text as tokens.
func Scan(text string) ([]Token, error) {
s := NewScanner(text)
var tokens []Token
for {
tok, err := s.Next()
if err != nil {
return nil, err
}
tokens = append(tokens, tok)
if tok.Type == TokEOF {
break
}
}
return tokens, nil
}
|