File: env2sed

package info (click to toggle)
nosql 3.1-4
  • links: PTS
  • area: main
  • in suites: woody
  • size: 1,448 kB
  • ctags: 267
  • sloc: cpp: 1,028; ansic: 915; awk: 732; perl: 502; tcl: 292; sh: 289; makefile: 44
file content (271 lines) | stat: -rwxr-xr-x 10,074 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#!/usr/bin/awk -f
#****************************************************************************
#  ##   ##         #####   #####  ##     **       NoSQL RDBMS - env2sed     *
#  ###  ##        ####### ####### ##     **        $Revision: 2.4.0 $       *
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **   Carlo Strozzi (c) 1998-2000   *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Written by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
#       to print, pipe through 'pr -t -e8'
#****************************************************************************
#
# Converts environment variables into sed(1) replacement statements.
#
# Usage:  env2sed
# 
# Note: options must be passed through the environment variable _awk_args,
#       i.e.: _awk_args='[options]'
#
# Options:
#     -m|--match R
#           Only do those variables that match the given regular expression.
#           R must be a valid awk(1) pattern, without surrounding slashes.
#
#     -d|--delete R
#           Delete anything that match the regular expression R in variable
#           values. R must be a valid awk(1) pattern, without surrounding
#           slashes. If both '-d' and '-b' are specified, they can affect
#           each other, in that '-d' is done before '-b'.
#
#     -b|--blank R
#           Anything that matches the regular expression R in variable
#           values is replaced with one single blank. R must be a valid
#           awk(1) pattern, without surrounding slashes. If both '-d' and
#           '-b' are specified, they can affect each other, in that '-d'
#           is done before '-b'.
#
#     -c|--cgi
#           Reverse the encoding done by the 'cgi2*' operator on Common
#           Gateway Interface (CGI) variables. This is done on what is
#           left after any '-b' and '-d' processing.
#
#     -x|--debug
#           Print command-line regular expressions to STDERR.
#
#     -u|--unescape
#           Turn NoSQL escapes \t and \n into horizontal tabs and newlines
#           respectively. This is not done by default, as the environment
#           is already supposed to contain those characters in their
#           unescaped form.
#
#     -s|--strip-names R
#           Strip anything that matches the regular expression R from
#           variable _names_. This can be useful, for instance, to remove
#           the leading 'WWW_' from CGI varibles set by the 'uncgi' program.
#
#     -p|--prefix P
#           Prefix the left-hand side of each output assignments 
#           with the string 'P'.
#
#     -h|--html
#           The output data must be suitable for inclusion in an HTML
#           page. This causes TABs and newlines to be converted into
#           the ASCII escapes 	 and 
 respectively.
#
#     -C|--strip-comments
#          Print the statements necessary to remove comment-lines,
#          i.e. lines that start with a '#' character.
#
# Environment variable names that do not match the regular expression
# /^[A-Za-z0-9_]+$/ are skipped, and a warning message is printed
# to STDERR.
#
# The program operates on environment variables rather than on STDIN,
# and this makes possible to operate on variables that contain physical 
# newlines and tabs rather than their NoSQL escapes \n and \t.
#
# Hint: to pass extra commands to sed(1) in one single invocation you
# can use the form:
#
#	     export _awk_args='[options]'
#            sed -e "$(env2sed)" -e "..." ...
#
# Warning: not all shells are robust enough to allow for large amounts
# of data in one single shell variable or string. If that's the case,
# then it is preferable to change the above usage example as follows:
#
#	     env2sed > temp_file
#	     echo another_sed_command >> temp_file
#	     echo ... and_so_on ... >> temp_file
#
#	     sed -f temp_file
#
#****************************************************************************
# NOTE: to edit, set ts=8 in 'vi' (or equivalent)
#       to print, pipe through 'pr -t -e8'
#****************************************************************************

BEGIN {
  NULL = ""; OFS = "\t"

  # I need to use the environment for passing args, as otherwise escapes
  # must be doubled, i.e. \n -> \\n, \t -> \\t, etc.

  split( ENVIRON["_awk_args"], args, " " )

  while ( args[++i] != NULL )
  {
    if ( args[i] == "-m" || args[i] == "--match" ) m_pattern = args[++i]
    else if ( args[i] == "-d" || args[i] == "--delete" )
    {
      remove = 1; d_pattern = args[++i]
    }
    else if ( args[i] == "-b" || args[i] == "--blank" )
    {
      blank = 1; b_pattern = args[++i]
    }
    else if ( args[i] == "-x" || args[i] == "--debug" ) debug = 1
    else if ( args[i] == "-c" || args[i] == "--cgi" ) cgi = 1
    else if ( args[i] == "-u" || args[i] == "--unescape" ) unescape = 1
    else if ( args[i] == "-s" || args[i] == "--strip-names" )
    {
      strip_names = 1; s_pattern = args[++i]
    }
    else if ( args[i] == "-p" || args[i] == "--prefix" ) prefix = args[++i]
    else if ( args[i] == "-h" || args[i] == "--html" ) html = 1
    else if ( args[i] == "-C" || args[i] == "--strip-comments" )
    {
      no_comments = 1
    }
  }

  if ( debug )
  {
    print "args:       " _awk_args    > "/dev/stderr"
    print "-m pattern: " m_pattern    > "/dev/stderr"
    print "-d pattern: " d_pattern    > "/dev/stderr"
    print "-b pattern: " b_pattern    > "/dev/stderr"
    print "-s pattern: " s_pattern    > "/dev/stderr"
  }

  if ( m_pattern == NULL ) m_pattern = ".*"

  # Handle comments.
  if ( no_comments ) printf("/^#.*$/d\n")

  for ( env in ENVIRON )
  {
    if ( env !~ m_pattern ) continue

    # Always skip rc(1) functions, options and NoSQL-specific stuff.
    if ( env ~ /^fn_/ || env ~ /^_nosql_/ || env == "_awk_args" ) continue

    # Skip invalid variable names. They may occur when we receive
    # them from a WWW Browser.
    if ( env !~ /^[A-Za-z0-9_]+$/ )
    {
      print "env2sed: bad variable name " env > "/dev/stderr"
      continue
    }

    # Honour the '-s' switch.
    dd = env
    if ( strip_names ) gsub( s_pattern, NULL, dd )

    # In case we stripped the whole name.
    if ( dd == NULL ) continue

    var = ENVIRON[ env ]

    # Honour '-d' and '-b' first.
    if ( remove ) gsub( d_pattern, NULL, var)
    if ( blank ) gsub( b_pattern, " ", var)

    if ( cgi )
    {
      # Revert the encoding done by the 'cgi2sh' utility.
      # These codes must appare exactly in the same order as they
      # appare inside the 'rewind' option of that utility.

      gsub( "	", "\t", var )                # tab
      gsub( "
", "\n", var )               # newline
      gsub( "'", "'", var )                # single quote
      gsub( "`", "`", var )                # backtick
      gsub( """, "\"", var )               # double quote
      gsub( ">", ">", var )                # Close tag
      gsub( "&#60;", "<", var )                # Open tag
      gsub( "&#35;", "#", var )                # Hash mark
      gsub( "&amp;", "\&", var )               # Ampersand
    }

    # Unescape NoSQL special characters if requested.
    if ( unescape ) var = NoSQL_Unescape( var )

    # Escape TABs and newlines in output for HTML if requested.
    if ( html ) {
       gsub( /\t/, "\&#9;", var )
       gsub( /\n/, "\&#10;", var )
    }

    # Apply actual sed(1) escaping. Do '\' first!

    var = Mawk_Bug( var )
    gsub( "\n", "\\\n", var )
    gsub( "&", "\\\\&", var )
    gsub( "#", "\\\\#", var )
    gsub( "/", "\\\/", var )

    # Add more sed(1) escapes here if necessary.

    printf("s/%s%s/%s/g", prefix, dd, var)

    printf("\n")
  }
}

########################################################################
# NoSQL_Unescape(string)
#
# Takes a string and translates any unescaped '\t' and '\n' strings into
# physical tabs and newlines respectively. Returns the converted string.
########################################################################
function NoSQL_Unescape(s,		S,i,s_length,a,escaped) {
  s_length = split(s, a, "")
  s_length++				# Cope with s_length==1
  while ( ++i <= s_length ) {
    if ( a[i] == "\\" && !escaped ) { escaped = 1; continue }
    if ( a[i] == "n" && escaped ) { S = S "\n"; escaped = 0; continue }
    if ( a[i] == "t" && escaped ) { S = S "\t"; escaped = 0; continue }
    if ( escaped ) { S = S "\\" a[i]; escaped = 0; continue }
    S = S a[i]
  }
  return S
}

########################################################################
# Mawk_Bug(string)
#
# Takes a string and turns all '\' characters into their escaped form
# '\\'. Returns the escaped string. This could be done with just a gsub(),
# but mawk(1) has a bug that makes it behave differently from other awk
# implementations:
#
# gsub( /\\/, "\\\\", field )		# This works with both gawk(1)
#					# and the original nawk(1).
#
# gsub( /\\/, "\\\\\\", field )		# This works just with mawk(1),
#					# otherwise it produces more
#					# backslashes than necessary,
#					# which looks rather obvious.
#
########################################################################
function Mawk_Bug( s,		a,i,j,S ) {

   i = split( s, a, "\\" )
   S = a[1]
   for ( j = 2; j <= i; j++ ) S = S "\\\\" a[j]
   return S
}

########################################################################
# End of program.
########################################################################