File: cgi2rc

package info (click to toggle)
nosql 3.1-4
  • links: PTS
  • area: main
  • in suites: woody
  • size: 1,448 kB
  • ctags: 267
  • sloc: cpp: 1,028; ansic: 915; awk: 732; perl: 502; tcl: 292; sh: 289; makefile: 44
file content (339 lines) | stat: -rwxr-xr-x 11,989 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
#!/usr/bin/awk -f
#****************************************************************************
#  ##   ##         #####   #####  ##     **       NoSQL RDBMS - cgi2rc      *
#  ###  ##        ####### ####### ##     **        $Revision: 2.4.0 $       *
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **   Carlo Strozzi (c) 1998-2000   *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Written by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
#       to print, pipe through 'pr -t -e8'
#****************************************************************************
#
# Translates special characters in CGI environment variables into their ISO
# entities. Alternatively, encoding can be done as per RFC 1378
# (URI-encoding). Prints the new assignments to STDOUT in a (safe) format,
# suitable for CGI programs written in the Rc shell.
#
# Usage:  cgi2rc
# 
# Note: options must be passed through the environment variable _awk_args
#
# Options:
#     -p|--prefix P
#           Prefix each default output variable name with string 'P'.
# 
#     -m|--match R
#           Only do those variables that match the given regular expression.
#           R must be a valid awk(1) pattern, without surrounding slashes.
#
#     -u|--uri [uri_prefix]
#           By default, only the NoSQL special characters \t and \n, plus
#           single-quotes (that may be harmful to the shell) and a small
#           subset of other characters are ISO encoded. With this option,
#           a much wider set of characters are encoded, and in this case
#           encoding is done according to RFC 1378 instead. The full set of
#           characters that are URI-encoded if '-u' is specified is :
#
#           % # & ? + SPACE = " \t ' \n \r $ ( ) [ ] ` < > \ / . | ; ! *
#           { } : ~
#
#           Note how '%' must be escaped first, and SPACE must be done
#           after '+'.
#           This option is mainly for sending data back to the client inside
#           a URL string.
#
#     -U|--also-uri [uri_prefix]
#           Same as '-u' but does also the default encoding.
# 
#     -d|--delete R
#           Delete anything that match the regular expression R in variable
#           values. R must be a valid awk(1) pattern, without surrounding
#           slashes. If both '-d' and '-b' are specified, they can affect 
#           each other, in that '-d' is done before '-b'.
#
#     -b|--blank R
#           Anything that matches the regular expression R in variable
#           values is replaced with one single blank. R must be a valid
#           awk(1) pattern, without surrounding slashes. If both '-d' and
#           '-b' are specified, they can affect each other, in that '-d'
#           is done before '-b'.
#
#     -r|--rewind
#           Before applying the forward-escaping mechanism, do the
#           inverse transformation. This is useful to re-process
#           data that are already escaped in the input values, not
#           to escape it twice.
#
#           Note: '-d', '-b' and '-r' do not apply to the 'uri' mode.
#
#     -c|--cookie [cookie_prefix]
#           Beside encoding, also parse the HTTP_COOKIE header and
#	    print the corresponding shell assignments.
#
#     -x|--debug
#           Print all the regular expressions give with command line
#           arguments to STDERR.
#
#
# Encode special characters in environment variables. Prints the results to
# STDOUT in the form of VARIABLE='encoded_value' pairs, suitable for being
# reused by the invoking shell to make new assignments.
#
# This operator reads the environment and prints the encoded assignments
# to STDOUT.
# 
# An example of usage of this operator from within an Rc shell script is :
# 
#	    _awk_args='-m ^WWW_' ; eval `cgi2rc
#
# Note how the command should be enclosed in double quotes, to
# Preserve any newlines in variable values.
#
# The program operates on environment variables rather than on STDIN,
# because this makes much easier to handle variables containing embedded
# newlines (\n), i.e. multi-line input records.
#
# If no variables match the given pattern, then the program prints the
# pseudo-noop shell instruction { } , to make sure that a non-empty
# list is returned to the calling program.
#
# Environment variable names that do not match the regular expression
# /^[A-Za-z0-9_]+$/ are skipped, and a warning message is printed
# to STDERR.
# 
########################################################################

BEGIN {
  NULL = ""; do_default = 1; printf("{")

  # Use environment, as args may contain backslash-escapes.
  split( ENVIRON["_awk_args"], args, " " )

  while ( args[++i] != NULL )
  {
    if ( args[i] == "-m" || args[i] == "--match" ) m_pattern = args[++i]
    else if ( args[i] == "-p" || args[i] == "--prefix" ) prefix = args[++i]
    else if ( args[i] == "-r" || args[i] == "--rewind" ) rewind = 1
    else if ( args[i] == "-u" || args[i] == "--uri" )
    {
      do_uri = 1
      if ( args[i+1] !~ /^-/ ) uri_prefix = args[++i]
      # This is for backward compatibility.
      do_default = 0
    }
    else if ( args[i] == "-U" || args[i] == "--also-uri" )
    {
      do_uri = 1
      if ( args[i+1] !~ /^-/ ) uri_prefix = args[++i]
    }
    else if ( args[i] == "-d" || args[i] == "--delete" )
    {
      remove = 1; d_pattern = args[++i]
    }
    else if ( args[i] == "-b" || args[i] == "--blank" )
    {
      blank = 1; b_pattern = args[++i]
    }
    else if ( args[i] == "-x" || args[i] == "--debug" ) debug = 1
    else if ( args[i] == "-c" || args[i] == "--cookie" )
    {
      do_cookie = 1
      if ( args[i+1] !~ /^-/ ) c_prefix = args[++i]
    }
  }

  if ( m_pattern == NULL ) m_pattern = ".*"
  if ( prefix != NULL ) do_default = 1           # For backward compat.
  if ( uri_prefix == NULL ) uri_prefix = prefix

  if ( debug )
  {
    print "-m pattern: " m_pattern    > "/dev/stderr"
    print "-d pattern: " d_pattern    > "/dev/stderr"
    print "-b pattern: " b_pattern    > "/dev/stderr"
  }

  for ( env in ENVIRON )
  {
    if ( env !~ m_pattern ) continue

    # Always skip rc(1) functions, options and NoSQL-specific stuff.
    if ( env ~ /^fn_/ || env ~ /^_nosql_/ || env == "_awk_args" ) continue

    if ( env == "HTTP_COOKIE" ) continue	# Cookies handled separately.

    # Skip invalid variable names. They may occur when we receive
    # them from a WWW Browser.

    if ( env !~ /^[A-Za-z0-9_]+$/ )
    {
      print "cgi2rc: bad variable name " env > "/dev/stderr"
      continue
    }

    var = ENVIRON[ env ]

    if ( do_uri )
    {
      # Encode a subset of RFC 1378 (partial encoding).

      # Order matters here.
      gsub("%", "%25", var)
      gsub("\+", "%2B", var)
      gsub(/  */, "+", var)

      # Order does not matter here.
      gsub("\t", "%09", var)
      gsub("\n", "%0A", var)
      gsub("'", "%27", var)
      gsub("#", "%23", var)
      gsub("&", "%26", var)
      gsub("\?", "%3F", var)
      gsub("=", "%3D", var)
      gsub("\"", "%22", var)
      gsub("\r", "%0D", var)
      gsub("\$", "%24", var)
      gsub("\(", "%28", var)
      gsub("\)", "%29", var)
      gsub("\[", "%5B", var)
      gsub("]", "%5D", var)
      gsub("`", "%60", var)
      gsub("<", "%3C", var)
      gsub(">", "%3E", var)
      gsub("\\", "%5C", var)
      gsub("/", "%2F", var)
      gsub("\.", "%2E", var)
      gsub("\|", "%7C", var)
      gsub(";", "%3B", var)
      gsub("!", "%21", var)
      gsub("\*", "%2A", var)
      gsub("{", "%7B", var)
      gsub("}", "%7D", var)
      gsub(":", "%3A", var)
      gsub("~", "%7E", var)

      # Add more encodings here, if necessary.

      # Print the new assignment.
      printf(" %s%s='%s';", uri_prefix,  env, var)

      # Restore original value of var.
      var = ENVIRON[ env ]
    }

    if ( do_default )
    {
      # Honour '-d' and '-b' first.
      if ( remove ) gsub( d_pattern, NULL, var )
      if ( blank ) gsub( b_pattern, " ", var )

      # The set of characters un-escaped here must be kept synchronized
      # with the one in 'envto*', and they must be in the reverse
      # order with respect to the ones escaped below.

      if ( rewind ) {
        gsub( "&#9;", "\t", var )                # tab
        gsub( "&#10;", "\n", var )               # newline
        gsub( "&#39;", "'", var )                # single quote
        gsub( "&#96;", "`", var )                # backtick
        gsub( "&#34;", "\"", var )               # double quote
	gsub( "&#62;", ">", var )                # Close tag
	gsub( "&#60;", "<", var )                # Open tag
        gsub( "&#35;", "#", var )                # Hash mark
        gsub( "&amp;", "\&", var )               # Ampersand
      }

      # Escape HTML special characters in input data.
      # Warning: '&' must be escaped first, then '#'. They need to
      # be escaped to prevent the data from containing statements
      # (both numeric and literal) that could be parsed by a Web server
      # as valid SSI calls, or ampersand-escaped sequences that would
      # act as formatting instructions to the Web browser.

      gsub( "&", "\&amp;", var )                # Ampersand
      gsub( "#", "\&#35;", var )                # Hash mark
      gsub( "<", "\&#60;", var )                # Open tag
      gsub( ">", "\&#62;", var )                # Close tag
      gsub( "\"", "\&#34;", var )               # double quote
      gsub( "'", "\&#39;", var )                # single quote
      gsub( "`", "\&#96;", var )                # backtick
      gsub( "\n", "\&#10;", var )               # newline
      gsub( "\t", "\&#9;", var )                # tab

      # Print the new assignment.
      printf(" %s%s='%s';", prefix,  env, var)
    }
  }

  if ( do_cookie && ENVIRON["HTTP_COOKIE"] != NULL ) {

     split( ENVIRON["HTTP_COOKIE"], c, "; " )	# Split cookies.

     while ( c[++j] != NULL ) {		# Print the new assignment.
	split( c[j], c1, "=" )
	gsub( "-", "_", c1[1] )
	if ( c1[1] !~ /^[A-Za-z0-9_]+$/ ) {
	   print "cgi2rc: bad cookie name " c1[1] > "/dev/stderr"
	   continue
	}

	# Decode a subset of RFC 1378 (partial decoding).

	# Order does not matter here.
	gsub("%09", "\t", c1[2])
	gsub("%0A", "\n", c1[2])
	gsub("%27", "'", c1[2])
	gsub("%23", "#", c1[2])
	gsub("%26", "&", c1[2])
	gsub("%3F", "?", c1[2])
	gsub("%3D", "=", c1[2])
	gsub("%22", "\"", c1[2])
	gsub("%0D", "\r", c1[2])
	gsub("%24", "$", c1[2])
	gsub("%28", "(", c1[2])
	gsub("%29", ")", c1[2])
	gsub("%5B", "[", c1[2])
	gsub("%5D", "]", c1[2])
	gsub("%60", "`", c1[2])
	gsub("%3C", "<", c1[2])
	gsub("%3E", ">", c1[2])
	gsub("%5C", "\\", c1[2])
	gsub("%2F", "/", c1[2])
	gsub("%2E", ".", c1[2])
	gsub("%7C", "|", c1[2])
	gsub("%3B", ";", c1[2])
	gsub("%21", "!", c1[2])
	gsub("%2A", "*", c1[2])
	gsub("%7B", "{", c1[2])
	gsub("%7D", "}", c1[2])
	gsub("%3A", ":", c1[2])
	gsub("%7E", "~", c1[2])

	# Order matters here.
	gsub("\+", " ", c1[2])
	gsub("%2B", "+", c1[2])
	gsub("%25", "%", c1[2])

	# Single-quotes must be doubled for Rc.
        gsub("'", "''", c1[2])

        printf(" %s%s='%s';", c_prefix,  c1[1], c1[2])
     }
  }

  # Close the shell {list} statement.
  printf("}\n")
}

########################################################################
# End of program.
########################################################################