1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
|
#n
#! /bin/sed -nf
####### DESCRIPTION ############################################################
#
# config.sed
# version 1.0
#
# This script processes a configuration file.
#
# The input to the script is a configuration file. Each line of the
# configuration file should be one of the following:
#
# 1) a blank line (with or without whitespace);
# 2) a comment;
# 3) an assignment via equals ('=');
# 4) an assignment via here document ('<<' or '<<-').
#
# The syntax of a comment is:
#
# [<ws>] '#' [<text>]
#
# The syntax of an assignment via equals is:
#
# [<ws>] <name> [<ws>] '=' [<ws>] [<value>] [<comment>]
#
# The name must begin with a letter or underscore and can contain any
# combination of digits, letters, dashes, periods, slashes, colons, and
# underscores.
#
# If the value is not present, the entire line will be treated as a comment. If
# the value is present, it must be either unquoted, single-quoted, or
# double-quoted. The value may begin on a subsequent line if each newline after
# the operator (and before the value) is immediately preceded by a backslash,
# and the backslash is the first non-whitespace character after the operator (or
# the first non-whitespace character of the line, in the case of intermittent
# lines). The value may continue over multiple lines if each newline within the
# value is immediately preceeded by a backslash. Such backslash-newline pairs
# will be removed and will not be included as part of the value. A newline
# cannot be part of a value assigned via equals.
#
# An unquoted value extends from the first non-whitespace character to the last
# non-whitespace character (or to the last non-whitespace character before a
# comment, if a comment is present). A pound sign may be included as part of the
# value if escaped by a backslash; the backslash will be removed and will not be
# included as part of the value. Each single quote and double quote (other than
# as the first character) will be treated as if escaped by a backslash (that is,
# no split tokenization will be performed. Each dollar sign, grave accent, and
# backslash (other than a backslash used to escape another character or a
# newline) will be treated as if escaped by a backslash (that is, no parameter
# substitution or command substitution will be performed); if such a backslash
# is present, it will be removed and will not be included as part of the value.
# Otherwise, each backslash (other than a backslash used to escape a newline)
# will be treated literally.
#
# A single-quoted value extends from the first character after the opening quote
# to the last character before the closing quote. A single quote cannot be
# included as part of the value. Each backslash (other than a backslash used to
# escape a newline) will be treated literally.
#
# A double-quoted value extends from the first character after the opening quote
# to the last character before the closing quote. A double quote may be included
# as part of the value if escaped by a backslash; the backslash will be removed
# and will not be included as part of the value. Each dollar sign, grave accent,
# and backslash (other than a backslash used to escape another character or a
# newline) will be treated as if escaped by a backslash (that is, no parameter
# substitution or command substitution will be performed); if such a backslash
# is present, it will be removed and will not be included as part of the value.
# Otherwise, each backslash (other than a backslash used to escape a newline)
# will be treated literally.
#
# The syntax of an assignment via here document is:
#
# [<ws>] <name> [<ws>] { '<<' | '<<-' } [<ws>] [<word> [<ws>]
# [<value-line>...]
# <word>]
#
# The name must adhere to the same syntax as specified for an assignment by
# equals.
#
# If the first occurrance of the word is not present, the entire line will be
# treated as a comment and processing will continue with the next line,
# interpreting it outside the context of a here document. If the first
# occurrance of the word is present, it extends from the first non-whitespace
# character after the operator to the last non-whitespace character, and will be
# treated literally (that is, no parameter substitution or command substitution
# will be performed, either in the word or in any of the value lines).
#
# If the operator is '<<' (meaning the here document is unindented), the value
# will be the concatenation of value lines, read literally, until the second
# occurrance of the word is encountered on a line by itself. The value will
# include each newline that separates a pair of value lines, but will not
# include the newline that separates the last value line from the second
# occurrance of the word.
#
# If the operator is '<<-' (meaning the here document is indented), the result
# will be the same as if the operator was '<<' except that leading tabs will be
# stripped from each line (including the second occurrance of the word).
#
# Since the here document is read literally, comments that appear in the here
# document will be included as part of the value. Also, note that this is not a
# true here document, since it is stored by sed and not in a temporary file.
#
# Because each value is stored by sed, the maximum length of a value is
# approximately eight kilobytes.
#
# The output of the script is a series of lines, one line per assignment in the
# configuration file. The syntax of each output line is:
#
# <name> '=' <encoded-value>
#
# Note that each name is guaranteed not to contain an equals sign, and each
# output line will contain no whitespace, unless whitespace is part of the
# value. Thus, the name of the assignment is the first character of the line to
# the last character before the first equals sign, and the encoded value of the
# assignment is the first character after the first equals sign to the last
# character of the line (not including the newline).
#
# In each encoded value, each double quote, dollar sign, grave accent, and
# backslash will be escaped by a backslash, and each newline (present only if
# assigned via here document) will be represented by the escape sequence '\n'.
# This is necessary for representing a newline and useful for assigning both an
# encoded value and a decoded value to shell parameters. Assuming the shell
# parameter 'LINE' contains an output line, consider the following:
#
# NAME=`echo $LINE | cut -f 1 -d "="`
# VALUE_ENCODED=`echo $LINE | cut -f 2- -d "="`
# eval "VALUE_DECODED=\"$VALUE_ENCODED\""
#
# The script will output a line as each assignment in the configuration file is
# processed, until the end of the configuration file is reached or until a
# syntax error is encountered in the configuration file. If the end of the
# configuration file is reached, the last output line will be a status code that
# indicates success. If a syntax error is encountered in the configuration file,
# the last output line will be a status code that indicates the error. The
# status code will be one of the following:
#
# 0 success;
# 1 invalid name;
# 2 beginning of value is folded beyond end-of-file;
# 3 unquoted value is folded beyond end-of-file;
# 4 single-quoted value is folded beyond end-of-file;
# 5 single-quoted value is missing closing quote;
# 6 single-quoted value contains extraneous quote;
# 7 double-quoted value is folded beyond end-of-file;
# 8 double-quoted value is missing closing quote;
# 9 double-quoted value contains extraneous quote;
# 10 unindented here document continues beyond end-of-file;
# 11 indented here document continues beyond end-of-file.
#
# If the status code is greater than 0, the second-to-last output line will be
# the line number at which the syntax error was encountered. If the status code
# is 1, the third-to-last output line will be the line of the configuration file
# with the invalid name. If the status code is greater than 1, the third-to-last
# output line will be the name that was to be assigned an invalid value.
#
#
####### HISTORY ################################################################
#
# 2001.11.28 Nathan D. Ryan <ryannd@cs.colorado.edu>
# Initial implementation. This probably would have been easier in
# perl, but I got carried away...
#
#
# process a blank line
#
/^[ ]*$/ b is_eof
# process a comment
#
/^[ ]*#/ b is_eof
# process an assignment via equals ('=')
#
/^[ ]*[A-Za-z_][0-9A-Za-z\.\/:_-]*[ ]*=/ {
# copy the name into the hold space, removing both extraneous whitespace
# and the operator
#
h; s/^[ ]*\([^ =]*\).*$/\1/
x; s/^[^=]*=[ ]*\(.*\)$/\1/
# unfold lines before the value
#
:unfold
/^\\$/ {
$ {
s/^.*$/2/; b quit
}
N; s/\\\n[ ]*//; b unfold
}
# process a null value
#
/^$/ b is_eof
# process a null value with a comment
#
/^#/ b is_eof
# process an unquoted value
#
/^[^"']/ {
:unfold0
# reset the replacement test
#
t reset0
:reset0
# replace a trailing comment with a space (the space prevents
# unintended line folding)
#
s/\([^\\]\)\(\\*\)\2#.*$/\1\2\2 /; t trim0
# unfold lines within the value
#
/[^\\]\(\\*\)\1\\$/ {
$ {
s/^.*$/3/; b quit
}
N; s/\\\n//; b unfold0
}
:trim0
# remove trailing whitespace
#
s/[ ]*$//
# remove each backslash used to escape a pound sign or a single
# quote
#
s/^\(\\*\)\1\\\([#']\)/\1\1\2/g
s/\([^\\]\)\(\\*\)\1\\\([#']\)/\1\2\2\3/g
b decode
}
# process a single-quoted value
#
/^'/ {
:unfold1
# reset the replacement test
#
t reset1
:reset1
# remove a trailing comment
#
s/\(.'[ ]*\)#.*$/\1/; t trim1
# handle an extraneous quote
#
/.'[ ]*[^ ]/ {
s/^.*$/6/; b quit
}
# unfold lines within the value
#
/\\$/ {
$ {
s/^.*$/4/; b quit
}
N; s/\\\n//; b unfold1
}
:trim1
# remove trailing whitespace
#
s/[ ]*$//
# handle a missing closing quote
#
/'$/ !{
s/^.*$/5/; b quit
}
# remove the opening quote and the closing quote
#
s/^'\(.*\)'$/\1/
b encode
}
# process a double-quoted value
#
/^"/ {
:unfold2
# reset the replacement test
#
t reset2
:reset2
# remove a trailing comment
#
s/\([^\\]\)\(\\*\)\2"[ ]*#.*$/\1\2\2"/; t trim2
# handle an extraneous quote
#
/[^\\]\(\\*\)\1"[ ]*[^ ]/ {
s/^.*$/9/; b quit
}
# unfold lines within the value
#
/[^\\]\(\\*\)\1\\$/ {
$ {
s/^.*$/7/; b quit
}
N; s/\\\n//; b unfold2
}
:trim2
# remove trailing whitespace
#
s/[ ]*$//
# handle a missing closing quote
#
/"$/ !{
s/^.*$/8/; b quit
}
# remove the opening quote and the closing quote
#
s/^"\(.*\)"$/\1/
b decode
}
}
# process an assignment via here document ('<<' or '<<-')
#
/^[ ]*[A-Za-z_][0-9A-Za-z\.\/:_-]*[ ]*<</ {
# copy the name into the hold space, removing extraneous whitespace
# while retaining the operator
#
h; s/^[ ]*\([^ <]*\).*$/\1/
x; s/^[^<]*\(<<.*\)$/\1/
# process a null value
#
/^<<-\{0,1\}[ ]*$/ b is_eof
# process a value as an unindented here document
#
/^<<[^-]/ {
# remove the operator
#
s/^<<[ ]*\(.*\)$/\1/
# add each line of the here document
#
:here0
$ {
s/^.*$/10/; b quit
}
N
/^\(.*\)\n.*\n\1$/ !{
b here0
}
# remove the opening word, the final newline, and the closing
# word
#
s/^\(.*\)\n\(.*\)\n\1$/\2/
b encode
}
# process a value as an indented here document
#
/^<<-.*$/ {
# remove the operator
#
s/^<<-[ ]*\(.*\)$/\1/
# add each line of the here document, removing leading tabs
#
:here1
$ {
s/^.*$/11/; b quit
}
N; s/^\(.*\n\) *\(.*\)$/\1\2/
/^\(.*\)\n.*\n\1$/ !{
b here1
}
# remove the opening word, the final newline, and the closing
# word
#
s/^\(.*\)\n\(.*\)\n\1$/\2/
b encode
}
}
# process an invalid line
#
h; s/^.*$/1/
# output the results of an invalid assignment and quit; the hold space should
# contain either a line with an invalid name or a valid name that was to be
# assigned an invalid value, and the pattern space should contain a status code
# that indicates the error
#
:quit
x; p; =; x; p
q
# decode the value; remove each backslash used to escape a double quote, a
# dollar sign, a grave accent, or another backslash
#
:decode
s/^\(\\*\)\1\\\(["$`]\)/\1\1\2/g
s/\([^\\]\)\(\\*\)\1\\\(["$`]\)/\1\2\2\3/g
s/\\\\/\\/g
# encode the value; add a backslash to escape each double quote, dollar sign,
# grave accent, and backslash; replace each newlin with the escape sequence '\n'
#
:encode
s/\\/\\\\/g
s/\(["$`]\)/\\\1/g
s/\n/\\n/g
# output the results of a valid assignment; the hold space contains the name,
# name, and the pattern space contains the encoded value
#
x; s/^\(.*\)$/\1=/; G; s/\n//; p
# determine if the last line was just processed; if so, output a status code
# that indicates success
#
:is_eof
$ {
s/^.*$/0/; p
}
|