File: shlex.go

package info (click to toggle)
kitty 0.42.1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 28,564 kB
  • sloc: ansic: 82,787; python: 55,191; objc: 5,122; sh: 1,295; xml: 364; makefile: 143; javascript: 78
file content (226 lines) | stat: -rw-r--r-- 5,443 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
/*
Package shlex implements a simple lexer which splits input in to tokens using
shell-style rules for quoting.

The basic use case uses the default ASCII lexer to split a string into sub-strings:

	shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}

To process a stream of strings:

	l := NewLexer(os.Stdin)
	for ; token, err := l.Next(); err != nil {
		// process token
	}
*/
package shlex

import (
	"fmt"
	"github.com/kovidgoyal/kitty/tools/utils"
	"strings"
	"unicode/utf8"
)

type Word struct {
	Value   string // The word is empty if EOF is reached
	Pos     int    // The position in the input string of the word or the trailer
	Err     error  // Indicates an error (unterminated string or trailing unescaped backslash)
	Trailer string // Extra trailing data such as an unterminated string or an unescaped backslash. Present only if Err != nil
}

type lexer_state int

// Lexer state machine states
const (
	lex_normal lexer_state = iota
	word
	string_without_escapes
	string_with_escapes
)

// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
type Lexer struct {
	state                       lexer_state
	src                         string
	src_sz, src_pos, word_start int
	buf                         strings.Builder
}

// NewLexer creates a new lexer from an input string.
func NewLexer(x string) *Lexer {
	return &Lexer{src: x, src_sz: len(x)}
}

func (self *Lexer) start_word() {
	self.buf.Reset()
	self.word_start = self.src_pos - 1
}

func (self *Lexer) get_word() Word {
	return Word{Pos: self.word_start, Value: self.buf.String()}
}

func (self *Lexer) write_ch(ch byte) {
	self.buf.WriteByte(ch)
}

func (self *Lexer) write_escaped_ch() bool {
	ch, count := utf8.DecodeRuneInString(self.src[self.src_pos:])
	if count > 0 {
		self.src_pos += count
		if ch != utf8.RuneError {
			self.buf.WriteRune(ch)
		}
		return true
	}
	return false
}

// Next returns the next word. At EOF Word.Value will be ""
func (self *Lexer) Next() (ans Word) {
	const string_with_escapes_delim = '"'
	const string_without_escapes_delim = '\''
	const escape_char = '\\'
	for self.src_pos < self.src_sz {
		ch := self.src[self.src_pos]
		self.src_pos++
		switch self.state {
		case lex_normal:
			switch ch {
			case ' ', '\n', '\r', '\t':
			case string_with_escapes_delim:
				self.state = string_with_escapes
				self.start_word()
			case string_without_escapes_delim:
				self.state = string_without_escapes
				self.start_word()
			case escape_char:
				self.start_word()
				if !self.write_escaped_ch() {
					ans.Trailer = "\\"
					ans.Err = fmt.Errorf("Extra backslash at end of input")
					ans.Pos = self.word_start
					return
				}
				self.state = word
			default:
				self.state = word
				self.start_word()
				self.write_ch(ch)
			}
		case word:
			switch ch {
			case ' ', '\n', '\r', '\t':
				self.state = lex_normal
				if self.buf.Len() > 0 {
					return self.get_word()
				}
			case string_with_escapes_delim:
				self.state = string_with_escapes
			case string_without_escapes_delim:
				self.state = string_without_escapes
			case escape_char:
				if !self.write_escaped_ch() {
					ans.Pos = self.word_start
					ans.Trailer = self.buf.String() + "\\"
					ans.Err = fmt.Errorf("Extra backslash at end of input")
					return
				}
			default:
				self.write_ch(ch)
			}
		case string_without_escapes:
			switch ch {
			case string_without_escapes_delim:
				self.state = word
			default:
				self.write_ch(ch)
			}
		case string_with_escapes:
			switch ch {
			case string_with_escapes_delim:
				self.state = word
			case escape_char:
				self.write_escaped_ch()
			default:
				self.write_ch(ch)
			}
		}
	}
	switch self.state {
	case word:
		self.state = lex_normal
		if self.buf.Len() > 0 {
			return self.get_word()
		}
	case string_with_escapes, string_without_escapes:
		self.state = lex_normal
		ans.Trailer = self.buf.String()
		ans.Pos = self.word_start
		ans.Err = fmt.Errorf("Unterminated string at end of input")
		return
	case lex_normal:

	}
	return
}

// Split partitions a string into a slice of strings.
func Split(s string) (ans []string, err error) {
	l := NewLexer(s)
	var word Word
	for {
		word = l.Next()
		if word.Err != nil {
			return ans, word.Err
		}
		if word.Value == "" {
			break
		}
		ans = append(ans, word.Value)
	}
	return
}

func Quote(s string) string {
	if s == "" {
		return s
	}
	if utils.MustCompile(`[^\w@%+=:,./-]`).MatchString(s) {
		return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'"
	}
	return s
}

// SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
// more relaxed about errors and also adding an empty string at the end if s ends with a Space.
func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
	t := NewLexer(s)
	argv = make([]string, 0, len(s)/4)
	for {
		word := t.Next()
		if word.Value == "" {
			if word.Trailer == "" {
				trimmed := strings.TrimRight(s, " ")
				if len(trimmed) < len(s) { // trailing spaces
					pos := position_of_last_arg
					if len(argv) > 0 {
						pos += len(argv[len(argv)-1])
					}
					if pos < len(s) { // trailing whitespace
						argv = append(argv, "")
						position_of_last_arg += len(s) - pos + 1
					}
				}
			} else {
				argv = append(argv, word.Trailer)
				position_of_last_arg = word.Pos
			}
			break
		}
		position_of_last_arg = word.Pos
		argv = append(argv, word.Value)
	}
	return
}