1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
|
/*
Package shlex implements a simple lexer which splits input in to tokens using
shell-style rules for quoting.
The basic use case uses the default ASCII lexer to split a string into sub-strings:
shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
To process a stream of strings:
l := NewLexer(os.Stdin)
for ; token, err := l.Next(); err != nil {
// process token
}
*/
package shlex
import (
"fmt"
"github.com/kovidgoyal/kitty/tools/utils"
"strings"
"unicode/utf8"
)
type Word struct {
Value string // The word is empty if EOF is reached
Pos int // The position in the input string of the word or the trailer
Err error // Indicates an error (unterminated string or trailing unescaped backslash)
Trailer string // Extra trailing data such as an unterminated string or an unescaped backslash. Present only if Err != nil
}
type lexer_state int
// Lexer state machine states
const (
lex_normal lexer_state = iota
word
string_without_escapes
string_with_escapes
)
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
type Lexer struct {
state lexer_state
src string
src_sz, src_pos, word_start int
buf strings.Builder
}
// NewLexer creates a new lexer from an input string.
func NewLexer(x string) *Lexer {
return &Lexer{src: x, src_sz: len(x)}
}
func (self *Lexer) start_word() {
self.buf.Reset()
self.word_start = self.src_pos - 1
}
func (self *Lexer) get_word() Word {
return Word{Pos: self.word_start, Value: self.buf.String()}
}
func (self *Lexer) write_ch(ch byte) {
self.buf.WriteByte(ch)
}
func (self *Lexer) write_escaped_ch() bool {
ch, count := utf8.DecodeRuneInString(self.src[self.src_pos:])
if count > 0 {
self.src_pos += count
if ch != utf8.RuneError {
self.buf.WriteRune(ch)
}
return true
}
return false
}
// Next returns the next word. At EOF Word.Value will be ""
func (self *Lexer) Next() (ans Word) {
const string_with_escapes_delim = '"'
const string_without_escapes_delim = '\''
const escape_char = '\\'
for self.src_pos < self.src_sz {
ch := self.src[self.src_pos]
self.src_pos++
switch self.state {
case lex_normal:
switch ch {
case ' ', '\n', '\r', '\t':
case string_with_escapes_delim:
self.state = string_with_escapes
self.start_word()
case string_without_escapes_delim:
self.state = string_without_escapes
self.start_word()
case escape_char:
self.start_word()
if !self.write_escaped_ch() {
ans.Trailer = "\\"
ans.Err = fmt.Errorf("Extra backslash at end of input")
ans.Pos = self.word_start
return
}
self.state = word
default:
self.state = word
self.start_word()
self.write_ch(ch)
}
case word:
switch ch {
case ' ', '\n', '\r', '\t':
self.state = lex_normal
if self.buf.Len() > 0 {
return self.get_word()
}
case string_with_escapes_delim:
self.state = string_with_escapes
case string_without_escapes_delim:
self.state = string_without_escapes
case escape_char:
if !self.write_escaped_ch() {
ans.Pos = self.word_start
ans.Trailer = self.buf.String() + "\\"
ans.Err = fmt.Errorf("Extra backslash at end of input")
return
}
default:
self.write_ch(ch)
}
case string_without_escapes:
switch ch {
case string_without_escapes_delim:
self.state = word
default:
self.write_ch(ch)
}
case string_with_escapes:
switch ch {
case string_with_escapes_delim:
self.state = word
case escape_char:
self.write_escaped_ch()
default:
self.write_ch(ch)
}
}
}
switch self.state {
case word:
self.state = lex_normal
if self.buf.Len() > 0 {
return self.get_word()
}
case string_with_escapes, string_without_escapes:
self.state = lex_normal
ans.Trailer = self.buf.String()
ans.Pos = self.word_start
ans.Err = fmt.Errorf("Unterminated string at end of input")
return
case lex_normal:
}
return
}
// Split partitions a string into a slice of strings.
func Split(s string) (ans []string, err error) {
l := NewLexer(s)
var word Word
for {
word = l.Next()
if word.Err != nil {
return ans, word.Err
}
if word.Value == "" {
break
}
ans = append(ans, word.Value)
}
return
}
func Quote(s string) string {
if s == "" {
return s
}
if utils.MustCompile(`[^\w@%+=:,./-]`).MatchString(s) {
return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'"
}
return s
}
// SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
// more relaxed about errors and also adding an empty string at the end if s ends with a Space.
func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
t := NewLexer(s)
argv = make([]string, 0, len(s)/4)
for {
word := t.Next()
if word.Value == "" {
if word.Trailer == "" {
trimmed := strings.TrimRight(s, " ")
if len(trimmed) < len(s) { // trailing spaces
pos := position_of_last_arg
if len(argv) > 0 {
pos += len(argv[len(argv)-1])
}
if pos < len(s) { // trailing whitespace
argv = append(argv, "")
position_of_last_arg += len(s) - pos + 1
}
}
} else {
argv = append(argv, word.Trailer)
position_of_last_arg = word.Pos
}
break
}
position_of_last_arg = word.Pos
argv = append(argv, word.Value)
}
return
}
|