File: token_shell.tcl

package info (click to toggle)
tcllib 1.20%2Bdfsg-1
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 68,064 kB
  • sloc: tcl: 216,842; ansic: 14,250; sh: 2,846; xml: 1,766; yacc: 1,145; pascal: 881; makefile: 107; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (172 lines) | stat: -rw-r--r-- 4,960 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# # ## ### ##### ######## ############# #####################
## Copyright (c) 2013 Andreas Kupries, BSD licensed

# # ## ### ##### ######## ############# #####################
## Requisites

package require Tcl 8.5
package require string::token

# # ## ### ##### ######## ############# #####################
## API setup

namespace eval ::string::token {
    # Note: string::token claims the "text" and "file" commands.
    namespace export shell
    namespace ensemble create
}

proc ::string::token::shell {args} {
    # result = list (word)

    set partial 0
    set indices 0
    while {[llength $args]} {
	switch -glob -- [set o [lindex $args 0]] {
	    -partial { set partial 1 }
	    -indices { set indices 1 }
	    -- {
		set args [lrange $args 1 end]
		break
	    }
	    -* {
		# Unknown option.
		return -code error \
		    -errorcode {STRING TOKEN SHELL BAD OPTION} \
		    "Bad option $o, expected one of -indices, or -partial"
	    }
	    * {
		# Non-option, stop option processing.
		break
	    }
	}
	set args [lrange $args 1 end]
    }
    if {[llength $args] != 1} {
	return -code error \
	    -errorcode {STRING TOKEN WRONG ARGS} \
	    "wrong \# args: should be \"[lindex [info level 0] 0] ?-indices? ?-partial? ?--? text\""
    } else {
	set text [lindex $args 0]
    }

    set space    \\s
    set     lexer {}
    lappend lexer ${space}+                                  WSPACE
    lappend lexer {'[^']*'}                                  S:QUOTED
    lappend lexer "\"(\[^\"\]|(\\\\\")|(\\\\\\\\))*\""       D:QUOTED
    lappend lexer "((\[^ $space'\"\])|(\\\\\")|(\\\\\\\\))+" PLAIN

    if {$partial} {
	lappend lexer {'[^']*$}                             S:QUOTED:PART
	lappend lexer "\"(\[^\"\]|(\\\\\")|(\\\\\\\\))*$"   D:QUOTED:PART
    }

    lappend lexer {.*}                                       ERROR

    set dequote [list \\" \" \\\\ \\ ] ; #"

    set result {}

    # Parsing of a shell line is a simple grammar, RE-equivalent
    # actually, thus tractable with a plain finite state machine.
    #
    # States:
    # - WS-WORD : Expected whitespace or word.
    # - WS      : Expected whitespace
    # - WORD    : Expected word.

    # We may have leading whitespace.
    set state WS-WORD
    foreach token [text $lexer $text] {
	lassign $token type start end

	#puts "[format %7s $state] + ($token) = <<[string range $text $start $end]>>"

	set changed 0
	switch -glob -- ${type}/$state {
	    ERROR/* {
		return -code error \
		    -errorcode {STRING TOKEN SHELL BAD SYNTAX CHAR} \
		    "Unexpected character '[string index $text $start]' at offset $start"
	    }
	    WSPACE/WORD {
		# Impossible
		return -code error \
		    -errorcode {STRING TOKEN SHELL BAD SYNTAX WHITESPACE} \
		    "Expected start of word, got whitespace at offset $start."
	    }
	    PLAIN/WS -
	    *:QUOTED*/WS {
		return -code error \
		    -errorcode {STRING TOKEN SHELL BAD SYNTAX WORD} \
		    "Expected whitespace, got start of word at offset $start"
	    }
            WSPACE/WS* {
		# Ignore leading, inter-word, and trailing whitespace
		# Must be followed by a word
		set state WORD
	    }
	    S:QUOTED/*WORD {
		# Quoted word, single, extract it, ignore delimiters.
		# Must be followed by whitespace.
		incr start
		incr end -1
		lappend result [string range $text $start $end]
		set state WS
		set changed 1
	    }
	    S:QUOTED:PART/*WORD {
		# Quoted partial word (at end), single, extract it, ignore delimiter at start, none at end.
		# Must be followed by nothing.
		incr start
		lappend result [string range $text $start $end]
		set state WS
		set changed 1
	    }
	    D:QUOTED/*WORD {
		# Quoted word, double, extract it, ignore delimiters.
		# Have to check for and reduce escaped double quotes and backslashes.
		# Must be followed by whitespace.
		incr start
		incr end -1
		lappend result [string map $dequote [string range $text $start $end]]
		set state WS
		set changed 1
	    }
	    D:QUOTED:PART/*WORD {
		# Quoted word, double, extract it, ignore delimiter at start, none at end.
		# Have to check for and reduce escaped double quotes and backslashes.
		# Must be followed by nothing.
		incr start
		lappend result [string map $dequote [string range $text $start $end]]
		set state WS
		set changed 1
	    }
	    PLAIN/*WORD {
		# Unquoted word. extract.
		# Have to check for and reduce escaped double quotes and backslashes.
		# Must be followed by whitespace.
		lappend result [string map $dequote [string range $text $start $end]]
		set state WS
		set changed 1
	    }
	    * {
		return -code error \
		    -errorcode {STRING TOKEN SHELL INTERNAL} \
		    "Illegal token/state combination $type/$state"
	    }
        }
	if {$indices && $changed} {
	    set last [lindex $result end]
	    set result [lreplace $result end end [list {*}$token $last]]
	}
    }
    return $result
}

# # ## ### ##### ######## ############# #####################
## Ready

package provide string::token::shell 1.2
return