
|
#!/usr/bin/awk -f
#****************************************************************************
# ## ## ##### ##### ## ** NoSQL RDBMS - cgi2sh *
# ### ## ####### ####### ## ** $Revision: 2.4.0 $ *
# #### ## ### ## ## ## ************************************
# ####### #### ##### ## ## ## ** Carlo Strozzi (c) 1998-2000 *
# ####### ###### ##### ## # ## ## ************************************
# ## #### ## ## ### ## ### ## ** Written by *
# ## ### ###### ####### ###### ###### ** Carlo Strozzi *
# ## ## #### ##### #### # ###### ** e-mail: carlos@linux.it *
#****************************************************************************
# NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi. *
# This program comes with ABSOLUTELY NO WARRANTY; for details *
# refer to the GNU General Public License. *
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
# to print, pipe through 'pr -t -e8'
#****************************************************************************
#
# Translates special characters in CGI environment variables into their ISO
# entities. Alternatively, encoding can be done as per RFC 1378
# (URI-encoding). Prints the new assignments to STDOUT in a (safe) format,
# suitable for CGI programs written in the Bourne shell.
#
# Usage: cgi2sh
#
# Note: options must be passed through the environment variable _awk_args
#
# Options:
# -p|--prefix P
# Prefix each default output variable name with string 'P'.
#
# -e|--export
# Prefix each assignment with the string 'export '.
#
# -m|--match R
# Only do those variables that match the given regular expression.
# R must be a valid awk(1) pattern, without surrounding slashes.
#
# -R|--readonly
# For each output assignment print also a corresponding
# 'readonly' statement. See sh(1).
#
# -u|--uri [uri_prefix]
# By default, only the NoSQL special characters \t and \n, plus
# single-quotes (that may be harmful to the shell) and a small
# subset of other characters are ISO encoded. With this option,
# a much wider set of characters are encoded, and in this case
# encoding is done according to RFC 1378 instead. The full set of
# characters that are URI-encoded if '-u' is specified is :
#
# % # & ? + SPACE = " \t ' \n \r $ ( ) [ ] ` < > \ / . | ; ! *
# { } : ~
#
# Note how '%' must be escaped first, and SPACE must be done
# after '+'.
# This option is mainly for sending data back to the client inside
# a URL string.
#
# -U|--also-uri [uri_prefix]
# Same as '-u' but does also the default encoding.
#
# -d|--delete R
# Delete anything that match the regular expression R in variable
# values. R must be a valid awk(1) pattern, without surrounding
# slashes. If both '-d' and '-b' are specified, they can affect
# each other, in that '-d' is done before '-b'.
#
# -b|--blank R
# Anything that matches the regular expression R in variable
# values is replaced with one single blank. R must be a valid
# awk(1) pattern, without surrounding slashes. If both '-d' and
# '-b' are specified, they can affect each other, in that '-d'
# is done before '-b'.
#
# -r|--rewind
# Before applying the forward-escaping mechanism, do the
# inverse transformation. This is useful to re-process
# data that are already escaped in the input values, not
# to escape it twice.
#
# Note: '-d', '-b' and '-r' do not apply to the 'uri' mode.
#
# -c|--cookie [cookie_prefix]
# Beside encoding, also parse the HTTP_COOKIE header and
# print the corresponding shell assignments.
#
# -x|--debug
# Print all the regular expressions give with command line
# arguments to STDERR.
#
#
# Encode special characters in environment variables. Prints the results to
# STDOUT in the form of VARIABLE='encoded_value' pairs, suitable for being
# reused by the invoking shell to make new assignments.
#
# This operator reads the environment and prints the encoded assignments
# to STDOUT.
#
# An example of usage of this operator from within a shell script is :
#
# export _awk_args='-m ^WWW_' ; eval "$(cgi2sh)"
#
# Note how the command should be enclosed in double quotes, to
# Preserve any newlines in variable values.
#
# The program operates on environment variables rather than on STDIN,
# because this makes much easier to handle variables containing embedded
# newlines (\n), i.e. multi-line input records.
#
# If no variables match the given pattern, then the program prints the
# pseudo-noop shell instruction { :; } , to make sure that a non-empty
# list is returned to the calling program.
#
# Environment variable names that do not match the regular expression
# /^[A-Za-z0-9_]+$/ are skipped, and a warning message is printed
# to STDERR.
#
########################################################################
BEGIN {
NULL = ""; do_default = 1; printf("{ :;")
# Use environment, as args may contain backslash-escapes.
split( ENVIRON["_awk_args"], args, " " )
while ( args[++i] != NULL )
{
if ( args[i] == "-e" || args[i] == "--export" ) export = 1
else if ( args[i] == "-m" || args[i] == "--match" ) m_pattern = args[++i]
else if ( args[i] == "-p" || args[i] == "--prefix" ) prefix = args[++i]
else if ( args[i] == "-R" || args[i] == "--readonly" ) readonly = 1
else if ( args[i] == "-r" || args[i] == "--rewind" ) rewind = 1
else if ( args[i] == "-u" || args[i] == "--uri" )
{
do_uri = 1
if ( args[i+1] !~ /^-/ ) uri_prefix = args[++i]
# This is for backward compatibility.
do_default = 0
}
else if ( args[i] == "-U" || args[i] == "--also-uri" )
{
do_uri = 1
if ( args[i+1] !~ /^-/ ) uri_prefix = args[++i]
}
else if ( args[i] == "-d" || args[i] == "--delete" )
{
remove = 1; d_pattern = args[++i]
}
else if ( args[i] == "-b" || args[i] == "--blank" )
{
blank = 1; b_pattern = args[++i]
}
else if ( args[i] == "-x" || args[i] == "--debug" ) debug = 1
else if ( args[i] == "-c" || args[i] == "--cookie" )
{
do_cookie = 1
if ( args[i+1] !~ /^-/ ) c_prefix = args[++i]
}
}
if ( m_pattern == NULL ) m_pattern = ".*"
if ( prefix != NULL ) do_default = 1 # For backward compat.
if ( uri_prefix == NULL ) uri_prefix = prefix
if ( debug )
{
print "-m pattern: " m_pattern > "/dev/stderr"
print "-d pattern: " d_pattern > "/dev/stderr"
print "-b pattern: " b_pattern > "/dev/stderr"
}
for ( env in ENVIRON )
{
if ( env !~ m_pattern ) continue
# Always skip rc(1) functions, options and NoSQL-specific stuff.
if ( env ~ /^fn_/ || env ~ /^_nosql_/ || env == "_awk_args" ) continue
if ( env == "HTTP_COOKIE" ) continue # Cookies handled separately.
# Skip invalid variable names. They may occur when we receive
# them from a WWW Browser.
if ( env !~ /^[A-Za-z0-9_]+$/ )
{
print "cgi2sh: bad variable name " env > "/dev/stderr"
continue
}
var = ENVIRON[ env ]
if ( do_uri )
{
# Encode a subset of RFC 1378 (partial encoding).
# Order matters here.
gsub("%", "%25", var)
gsub("\+", "%2B", var)
gsub(/ */, "+", var)
# Order does not matter here.
gsub("\t", "%09", var)
gsub("\n", "%0A", var)
gsub("'", "%27", var)
gsub("#", "%23", var)
gsub("&", "%26", var)
gsub("\?", "%3F", var)
gsub("=", "%3D", var)
gsub("\"", "%22", var)
gsub("\r", "%0D", var)
gsub("\$", "%24", var)
gsub("\(", "%28", var)
gsub("\)", "%29", var)
gsub("\[", "%5B", var)
gsub("]", "%5D", var)
gsub("`", "%60", var)
gsub("<", "%3C", var)
gsub(">", "%3E", var)
gsub("\\", "%5C", var)
gsub("/", "%2F", var)
gsub("\.", "%2E", var)
gsub("\|", "%7C", var)
gsub(";", "%3B", var)
gsub("!", "%21", var)
gsub("\*", "%2A", var)
gsub("{", "%7B", var)
gsub("}", "%7D", var)
gsub(":", "%3A", var)
gsub("~", "%7E", var)
# Add more encodings here, if necessary.
# Print the new assignment.
if ( export ) printf(" export")
printf(" %s%s=\"%s\";", uri_prefix, env, var)
if ( readonly ) printf(" readonly %s%s;", uri_prefix, env)
# Restore original value of var.
var = ENVIRON[ env ]
}
if ( do_default )
{
# Honour '-d' and '-b' first.
if ( remove ) gsub( d_pattern, NULL, var )
if ( blank ) gsub( b_pattern, " ", var )
# The set of characters un-escaped here must be kept synchronized
# with the one in 'envto*', and they must be in the reverse
# order with respect to the ones escaped below.
if ( rewind ) {
gsub( "	", "\t", var ) # tab
gsub( " ", "\n", var ) # newline
gsub( "'", "'", var ) # single quote
gsub( "`", "`", var ) # backtick
gsub( """, "\"", var ) # double quote
gsub( ">", ">", var ) # Close tag
gsub( "<", "<", var ) # Open tag
gsub( "#", "#", var ) # Hash mark
gsub( "&", "\&", var ) # Ampersand
}
# Escape HTML special characters in input data.
# Warning: '&' must be escaped first, then '#'. They need to
# be escaped to prevent the data from containing statements
# (both numeric and literal) that could be parsed by a Web server
# as valid SSI calls, or ampersand-escaped sequences that would
# act as formatting instructions to the Web browser.
gsub( "&", "\&", var ) # Ampersand
gsub( "#", "\#", var ) # Hash mark
gsub( "<", "\<", var ) # Open tag
gsub( ">", "\>", var ) # Close tag
gsub( "\"", "\"", var ) # double quote
gsub( "'", "\'", var ) # single quote
gsub( "`", "\`", var ) # backtick
gsub( "\n", "\ ", var ) # newline
gsub( "\t", "\	", var ) # tab
# Print the new assignment.
if ( export ) printf(" export")
printf(" %s%s=\"%s\";", prefix, env, SH_Escape(var))
if ( readonly ) printf(" readonly %s%s;", prefix, env)
}
}
if ( do_cookie && ENVIRON["HTTP_COOKIE"] != NULL ) {
split( ENVIRON["HTTP_COOKIE"], c, "; " ) # Split cookies.
while ( c[++j] != NULL ) { # Print the new assignment.
split( c[j], c1, "=" )
gsub( "-", "_", c1[1] )
if ( c1[1] !~ /^[A-Za-z0-9_]+$/ ) {
print "cgi2sh: bad cookie name " c1[1] > "/dev/stderr"
continue
}
# Decode a subset of RFC 1378 (partial decoding).
# Order does not matter here.
gsub("%09", "\t", c1[2])
gsub("%0A", "\n", c1[2])
gsub("%27", "'", c1[2])
gsub("%23", "#", c1[2])
gsub("%26", "&", c1[2])
gsub("%3F", "?", c1[2])
gsub("%3D", "=", c1[2])
gsub("%22", "\"", c1[2])
gsub("%0D", "\r", c1[2])
gsub("%24", "$", c1[2])
gsub("%28", "(", c1[2])
gsub("%29", ")", c1[2])
gsub("%5B", "[", c1[2])
gsub("%5D", "]", c1[2])
gsub("%60", "`", c1[2])
gsub("%3C", "<", c1[2])
gsub("%3E", ">", c1[2])
gsub("%5C", "\\", c1[2])
gsub("%2F", "/", c1[2])
gsub("%2E", ".", c1[2])
gsub("%7C", "|", c1[2])
gsub("%3B", ";", c1[2])
gsub("%21", "!", c1[2])
gsub("%2A", "*", c1[2])
gsub("%7B", "{", c1[2])
gsub("%7D", "}", c1[2])
gsub("%3A", ":", c1[2])
gsub("%7E", "~", c1[2])
# Order matters here.
gsub("\+", " ", c1[2])
gsub("%2B", "+", c1[2])
gsub("%25", "%", c1[2])
if ( export ) printf(" export")
printf(" %s%s=\"%s\";", c_prefix, c1[1], SH_Escape(c1[2]))
if ( readonly ) printf(" readonly %s%s;", c_prefix, c1[1])
}
}
# Close the shell { list; } statement.
printf(" }\n")
}
########################################################################
# SH_Escape(string)
########################################################################
function SH_Escape(s) {
s = Mawk_Bug(s)
gsub(/"/,"\\\"",s)
gsub(/\$/,"\\$",s)
gsub(/`/,"\\`",s)
return s
}
########################################################################
# Mawk_Bug(string)
#
# Takes a string and turns all '\' characters into their escaped form
# '\\'. Returns the escaped string. This could be done with just a gsub(),
# but mawk(1) has a bug that makes it behave differently from other awk
# implementations:
#
# gsub( /\\/, "\\\\", field ) # This works with both gawk(1)
# # and the original nawk(1).
#
# gsub( /\\/, "\\\\\\", field ) # This works just with mawk(1),
# # otherwise it produces more
# # backslashes than necessary,
# # which looks rather obvious.
#
########################################################################
function Mawk_Bug( s, a,i,j,S ) {
i = split( s, a, "\\" )
S = a[1]
for ( j = 2; j <= i; j++ ) S = S "\\\\" a[j]
return S
}
########################################################################
# End of program.
########################################################################
|