1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
|
#!/usr/bin/awk -f
#****************************************************************************
# ## ## ##### ##### ## ** NoSQL RDBMS - cgi2sh *
# ### ## ####### ####### ## ** $Revision: 2.4.0 $ *
# #### ## ### ## ## ## ************************************
# ####### #### ##### ## ## ## ** Carlo Strozzi (c) 1998-2000 *
# ####### ###### ##### ## # ## ## ************************************
# ## #### ## ## ### ## ### ## ** Written by *
# ## ### ###### ####### ###### ###### ** Carlo Strozzi *
# ## ## #### ##### #### # ###### ** e-mail: carlos@linux.it *
#****************************************************************************
# NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi. *
# This program comes with ABSOLUTELY NO WARRANTY; for details *
# refer to the GNU General Public License. *
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
# to print, pipe through 'pr -t -e8'
#****************************************************************************
#
# Translates special characters in CGI environment variables into their ISO
# entities. Alternatively, encoding can be done as per RFC 1378
# (URI-encoding). Prints the new assignments to STDOUT in a (safe) format,
# suitable for CGI programs written in the Bourne shell.
#
# Usage: cgi2sh
#
# Note: options must be passed through the environment variable _awk_args
#
# Options:
# -p|--prefix P
# Prefix each default output variable name with string 'P'.
#
# -e|--export
# Prefix each assignment with the string 'export '.
#
# -m|--match R
# Only do those variables that match the given regular expression.
# R must be a valid awk(1) pattern, without surrounding slashes.
#
# -R|--readonly
# For each output assignment print also a corresponding
# 'readonly' statement. See sh(1).
#
# -u|--uri [uri_prefix]
# By default, only the NoSQL special characters \t and \n, plus
# single-quotes (that may be harmful to the shell) and a small
# subset of other characters are ISO encoded. With this option,
# a much wider set of characters are encoded, and in this case
# encoding is done according to RFC 1378 instead. The full set of
# characters that are URI-encoded if '-u' is specified is :
#
# % # & ? + SPACE = " \t ' \n \r $ ( ) [ ] ` < > \ / . | ; ! *
# { } : ~
#
# Note how '%' must be escaped first, and SPACE must be done
# after '+'.
# This option is mainly for sending data back to the client inside
# a URL string.
#
# -U|--also-uri [uri_prefix]
# Same as '-u' but does also the default encoding.
#
# -d|--delete R
# Delete anything that match the regular expression R in variable
# values. R must be a valid awk(1) pattern, without surrounding
# slashes. If both '-d' and '-b' are specified, they can affect
# each other, in that '-d' is done before '-b'.
#
# -b|--blank R
# Anything that matches the regular expression R in variable
# values is replaced with one single blank. R must be a valid
# awk(1) pattern, without surrounding slashes. If both '-d' and
# '-b' are specified, they can affect each other, in that '-d'
# is done before '-b'.
#
# -r|--rewind
# Before applying the forward-escaping mechanism, do the
# inverse transformation. This is useful to re-process
# data that are already escaped in the input values, not
# to escape it twice.
#
# Note: '-d', '-b' and '-r' do not apply to the 'uri' mode.
#
# -c|--cookie [cookie_prefix]
# Beside encoding, also parse the HTTP_COOKIE header and
# print the corresponding shell assignments.
#
# -x|--debug
# Print all the regular expressions give with command line
# arguments to STDERR.
#
#
# Encode special characters in environment variables. Prints the results to
# STDOUT in the form of VARIABLE='encoded_value' pairs, suitable for being
# reused by the invoking shell to make new assignments.
#
# This operator reads the environment and prints the encoded assignments
# to STDOUT.
#
# An example of usage of this operator from within a shell script is :
#
# export _awk_args='-m ^WWW_' ; eval "$(cgi2sh)"
#
# Note how the command should be enclosed in double quotes, to
# Preserve any newlines in variable values.
#
# The program operates on environment variables rather than on STDIN,
# because this makes much easier to handle variables containing embedded
# newlines (\n), i.e. multi-line input records.
#
# If no variables match the given pattern, then the program prints the
# pseudo-noop shell instruction { :; } , to make sure that a non-empty
# list is returned to the calling program.
#
# Environment variable names that do not match the regular expression
# /^[A-Za-z0-9_]+$/ are skipped, and a warning message is printed
# to STDERR.
#
########################################################################
BEGIN {
NULL = ""; do_default = 1; printf("{ :;")
# Use environment, as args may contain backslash-escapes.
split( ENVIRON["_awk_args"], args, " " )
while ( args[++i] != NULL )
{
if ( args[i] == "-e" || args[i] == "--export" ) export = 1
else if ( args[i] == "-m" || args[i] == "--match" ) m_pattern = args[++i]
else if ( args[i] == "-p" || args[i] == "--prefix" ) prefix = args[++i]
else if ( args[i] == "-R" || args[i] == "--readonly" ) readonly = 1
else if ( args[i] == "-r" || args[i] == "--rewind" ) rewind = 1
else if ( args[i] == "-u" || args[i] == "--uri" )
{
do_uri = 1
if ( args[i+1] !~ /^-/ ) uri_prefix = args[++i]
# This is for backward compatibility.
do_default = 0
}
else if ( args[i] == "-U" || args[i] == "--also-uri" )
{
do_uri = 1
if ( args[i+1] !~ /^-/ ) uri_prefix = args[++i]
}
else if ( args[i] == "-d" || args[i] == "--delete" )
{
remove = 1; d_pattern = args[++i]
}
else if ( args[i] == "-b" || args[i] == "--blank" )
{
blank = 1; b_pattern = args[++i]
}
else if ( args[i] == "-x" || args[i] == "--debug" ) debug = 1
else if ( args[i] == "-c" || args[i] == "--cookie" )
{
do_cookie = 1
if ( args[i+1] !~ /^-/ ) c_prefix = args[++i]
}
}
if ( m_pattern == NULL ) m_pattern = ".*"
if ( prefix != NULL ) do_default = 1 # For backward compat.
if ( uri_prefix == NULL ) uri_prefix = prefix
if ( debug )
{
print "-m pattern: " m_pattern > "/dev/stderr"
print "-d pattern: " d_pattern > "/dev/stderr"
print "-b pattern: " b_pattern > "/dev/stderr"
}
for ( env in ENVIRON )
{
if ( env !~ m_pattern ) continue
# Always skip rc(1) functions, options and NoSQL-specific stuff.
if ( env ~ /^fn_/ || env ~ /^_nosql_/ || env == "_awk_args" ) continue
if ( env == "HTTP_COOKIE" ) continue # Cookies handled separately.
# Skip invalid variable names. They may occur when we receive
# them from a WWW Browser.
if ( env !~ /^[A-Za-z0-9_]+$/ )
{
print "cgi2sh: bad variable name " env > "/dev/stderr"
continue
}
var = ENVIRON[ env ]
if ( do_uri )
{
# Encode a subset of RFC 1378 (partial encoding).
# Order matters here.
gsub("%", "%25", var)
gsub("\+", "%2B", var)
gsub(/ */, "+", var)
# Order does not matter here.
gsub("\t", "%09", var)
gsub("\n", "%0A", var)
gsub("'", "%27", var)
gsub("#", "%23", var)
gsub("&", "%26", var)
gsub("\?", "%3F", var)
gsub("=", "%3D", var)
gsub("\"", "%22", var)
gsub("\r", "%0D", var)
gsub("\$", "%24", var)
gsub("\(", "%28", var)
gsub("\)", "%29", var)
gsub("\[", "%5B", var)
gsub("]", "%5D", var)
gsub("`", "%60", var)
gsub("<", "%3C", var)
gsub(">", "%3E", var)
gsub("\\", "%5C", var)
gsub("/", "%2F", var)
gsub("\.", "%2E", var)
gsub("\|", "%7C", var)
gsub(";", "%3B", var)
gsub("!", "%21", var)
gsub("\*", "%2A", var)
gsub("{", "%7B", var)
gsub("}", "%7D", var)
gsub(":", "%3A", var)
gsub("~", "%7E", var)
# Add more encodings here, if necessary.
# Print the new assignment.
if ( export ) printf(" export")
printf(" %s%s=\"%s\";", uri_prefix, env, var)
if ( readonly ) printf(" readonly %s%s;", uri_prefix, env)
# Restore original value of var.
var = ENVIRON[ env ]
}
if ( do_default )
{
# Honour '-d' and '-b' first.
if ( remove ) gsub( d_pattern, NULL, var )
if ( blank ) gsub( b_pattern, " ", var )
# The set of characters un-escaped here must be kept synchronized
# with the one in 'envto*', and they must be in the reverse
# order with respect to the ones escaped below.
if ( rewind ) {
gsub( "	", "\t", var ) # tab
gsub( " ", "\n", var ) # newline
gsub( "'", "'", var ) # single quote
gsub( "`", "`", var ) # backtick
gsub( """, "\"", var ) # double quote
gsub( ">", ">", var ) # Close tag
gsub( "<", "<", var ) # Open tag
gsub( "#", "#", var ) # Hash mark
gsub( "&", "\&", var ) # Ampersand
}
# Escape HTML special characters in input data.
# Warning: '&' must be escaped first, then '#'. They need to
# be escaped to prevent the data from containing statements
# (both numeric and literal) that could be parsed by a Web server
# as valid SSI calls, or ampersand-escaped sequences that would
# act as formatting instructions to the Web browser.
gsub( "&", "\&", var ) # Ampersand
gsub( "#", "\#", var ) # Hash mark
gsub( "<", "\<", var ) # Open tag
gsub( ">", "\>", var ) # Close tag
gsub( "\"", "\"", var ) # double quote
gsub( "'", "\'", var ) # single quote
gsub( "`", "\`", var ) # backtick
gsub( "\n", "\ ", var ) # newline
gsub( "\t", "\	", var ) # tab
# Print the new assignment.
if ( export ) printf(" export")
printf(" %s%s=\"%s\";", prefix, env, SH_Escape(var))
if ( readonly ) printf(" readonly %s%s;", prefix, env)
}
}
if ( do_cookie && ENVIRON["HTTP_COOKIE"] != NULL ) {
split( ENVIRON["HTTP_COOKIE"], c, "; " ) # Split cookies.
while ( c[++j] != NULL ) { # Print the new assignment.
split( c[j], c1, "=" )
gsub( "-", "_", c1[1] )
if ( c1[1] !~ /^[A-Za-z0-9_]+$/ ) {
print "cgi2sh: bad cookie name " c1[1] > "/dev/stderr"
continue
}
# Decode a subset of RFC 1378 (partial decoding).
# Order does not matter here.
gsub("%09", "\t", c1[2])
gsub("%0A", "\n", c1[2])
gsub("%27", "'", c1[2])
gsub("%23", "#", c1[2])
gsub("%26", "&", c1[2])
gsub("%3F", "?", c1[2])
gsub("%3D", "=", c1[2])
gsub("%22", "\"", c1[2])
gsub("%0D", "\r", c1[2])
gsub("%24", "$", c1[2])
gsub("%28", "(", c1[2])
gsub("%29", ")", c1[2])
gsub("%5B", "[", c1[2])
gsub("%5D", "]", c1[2])
gsub("%60", "`", c1[2])
gsub("%3C", "<", c1[2])
gsub("%3E", ">", c1[2])
gsub("%5C", "\\", c1[2])
gsub("%2F", "/", c1[2])
gsub("%2E", ".", c1[2])
gsub("%7C", "|", c1[2])
gsub("%3B", ";", c1[2])
gsub("%21", "!", c1[2])
gsub("%2A", "*", c1[2])
gsub("%7B", "{", c1[2])
gsub("%7D", "}", c1[2])
gsub("%3A", ":", c1[2])
gsub("%7E", "~", c1[2])
# Order matters here.
gsub("\+", " ", c1[2])
gsub("%2B", "+", c1[2])
gsub("%25", "%", c1[2])
if ( export ) printf(" export")
printf(" %s%s=\"%s\";", c_prefix, c1[1], SH_Escape(c1[2]))
if ( readonly ) printf(" readonly %s%s;", c_prefix, c1[1])
}
}
# Close the shell { list; } statement.
printf(" }\n")
}
########################################################################
# SH_Escape(string)
########################################################################
function SH_Escape(s) {
s = Mawk_Bug(s)
gsub(/"/,"\\\"",s)
gsub(/\$/,"\\$",s)
gsub(/`/,"\\`",s)
return s
}
########################################################################
# Mawk_Bug(string)
#
# Takes a string and turns all '\' characters into their escaped form
# '\\'. Returns the escaped string. This could be done with just a gsub(),
# but mawk(1) has a bug that makes it behave differently from other awk
# implementations:
#
# gsub( /\\/, "\\\\", field ) # This works with both gawk(1)
# # and the original nawk(1).
#
# gsub( /\\/, "\\\\\\", field ) # This works just with mawk(1),
# # otherwise it produces more
# # backslashes than necessary,
# # which looks rather obvious.
#
########################################################################
function Mawk_Bug( s, a,i,j,S ) {
i = split( s, a, "\\" )
S = a[1]
for ( j = 2; j <= i; j++ ) S = S "\\\\" a[j]
return S
}
########################################################################
# End of program.
########################################################################
|