File: wvtclstring.cc

package info (click to toggle)
wvstreams 4.0.2-4
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 6,420 kB
  • ctags: 6,518
  • sloc: cpp: 52,544; sh: 5,770; ansic: 810; makefile: 461; tcl: 114; perl: 18
file content (252 lines) | stat: -rw-r--r-- 5,761 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/*
 * Worldvisions Weaver Software:
 *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
 */
#include "wvtclstring.h"
#include "wvbackslash.h"
#include "wvbuf.h"
#include <wvstream.h>

WvString wvtcl_escape(WvStringParm s, const char *nasties)
{
    WvString allnasties(WVTCL_ALWAYS_NASTY);
    allnasties.append(nasties);
    
    bool backslashify = false, inescape = false;
    int unprintables = 0, bracecount = 0;
    const char *cptr;
    
    // NULL strings remain such
    if (!(const char *)s)
	return s;
    
    // empty strings are just {}
    if (!s)
	return "{}";
    
    // figure out which method we need to use: backslashify or embrace.
    // also count the number of unprintable characters we'll need to 
    // backslashify, if it turns out that's necessary.
    for (cptr = s; *cptr; cptr++)
    {
	if (!inescape && *cptr == '{')
	    bracecount++;
	else if (!inescape && *cptr == '}')
	    bracecount--;
	if (bracecount < 0)
	    backslashify = true;
	
	if (strchr(allnasties.cstr(), *cptr))
	    unprintables++;

	if (*cptr == '\\')
	    inescape = !inescape;
	else
	    inescape = false;
    }
    
    // if the braces aren't balanced, backslashify
    if (bracecount != 0)
        backslashify = true;

    if (!backslashify && !unprintables)
	return s; // no work needed!
    
    if (backslashify)
    {
	// the backslashify method: backslash-escape _all_ suspicious chars.
        return WvBackslashEncoder(allnasties).strflushstr(s, true);
    }
    else
    {
	// the embrace method: just take the string and put braces around it
	return WvString("{%s}", s);
    }
}


WvString wvtcl_unescape(WvStringParm s)
{
    //printf("  unescape '%s'\n", (const char *)s);
    
    // empty or NULL strings remain themselves
    if (!s)
	return s;
    
    int slen = s.len();
    bool skipquotes = false;
    
    // deal with embraced strings by simply removing the braces
    if (s[0] == '{' && s[slen-1] == '}')
    {
	WvString out;
	char *optr;
	
	out = s+1;
	optr = out.edit() + slen - 2;
	*optr = 0;
	return out;
    }
    
    // deal with quoted strings by ignoring the quotes _and_ unbackslashifying.
    if (s[0] == '"' && s[slen-1] == '"')
	skipquotes = true;
    
    // strings without backslashes don't need to be unbackslashified!
    if (!skipquotes && !strchr(s, '\\'))
	return s;
    
    // otherwise, unbackslashify it.
    return WvBackslashDecoder().strflushmem(
        s.cstr() + int(skipquotes),
        slen - int(skipquotes) * 2, true);
}


WvString wvtcl_encode(WvList<WvString> &l, const char *nasties,
		      const char *splitchars)
{
    WvDynBuf b;
    WvList<WvString>::Iter i(l);
    for (i.rewind(); i.next(); )
    {
	// elements are separated by spaces
	if (b.used())
	    b.put(splitchars, 1);
	
	// escape and add the element
	b.putstr(wvtcl_escape(*i, nasties));
    }
    
    return b.getstr();
}

WvString wvtcl_getword(WvBuf &buf, const char *splitchars, bool do_unescape)
{
    int origsize = buf.used();
    //printf("      used=%d\n", origsize);
    if (!origsize) return WvString();

    bool inescape = false, inquote = false, incontinuation = false;
    int bracecount = 0;
    const char *origptr = (const char *)buf.get(origsize), 
	       *origend = origptr + origsize;
    const char *sptr = origptr, *eptr;

    // skip leading separators
    for (sptr = origptr; sptr < origend; sptr++)
    {
	if (!strchr(splitchars, *sptr))
	    break;
    }

    if (sptr >= origend) // nothing left
    {
        buf.unget(origsize);
	//printf("ungot %d\n", origsize);
        return WvString();
    }

    // detect initial quote
    if (*sptr == '"')
    {
        inquote = true;
	eptr = sptr+1;
    }
    else
	eptr = sptr;
    
    // loop over string until something satisfactory is found
    for (; (eptr-origptr) < origsize; eptr++)
    {
	char ch = *eptr;
	
        incontinuation = false;
	
        if (inescape)
        {
            if (ch == '\n')
	    {
		// technically we've finished the line-continuation
		// sequence, but we require at least one more character
		// in order to prove that there's a next line somewhere
		// in the buffer.  Otherwise we might stop parsing before
		// we're "really" done if we're given input line-by-line.
                incontinuation = true;
	    }
            else
                inescape = false;
        }
        else if (ch == '\\')
	{
	    inescape = true;
	    // now we need a character to complete the escape
        }
	else // not an escape sequence
	{
	    // detect end of a quoted/unquoted string
	    if (bracecount == 0)
	    {
		if (inquote)
		{
		    if (ch == '"')
		    {
			eptr++;
			break;
		    }
		}
		else if (strchr(splitchars, ch))
		    break;
	    }
	    
	    // match braces
	    if (!inquote)
	    {
		if (ch == '{')
		    bracecount++;
		else if (ch == '}')
		    bracecount--;
	    }
	}
    }
    
    if (bracecount || sptr==eptr || inquote || inescape || incontinuation)
    {
	// not there yet...
	buf.unget(origsize);
	return WvString();
    }

    WvString ret;
    ret.setsize(eptr - sptr + 1);
    char *retptr = ret.edit();
    memcpy(retptr, sptr, eptr-sptr);
    retptr[eptr-sptr] = 0;
    
    //printf("len=%d, unget=%d\n", eptr - sptr, origend - eptr);
    buf.unget(origend - eptr);

    if (do_unescape)
        return wvtcl_unescape(ret);
    else
	return ret;
}


void wvtcl_decode(WvList<WvString> &l, WvStringParm _s,
		  const char *splitchars, bool do_unescape)
{
    // empty or null strings are empty lists
    if (!_s)
	return;

    WvConstStringBuffer buf(_s);
    while (buf.used() > 0)
    {
        WvString appendword = wvtcl_getword(buf, splitchars, do_unescape);
        if (appendword.isnull())
	    break;
	
	l.append(new WvString(appendword), true);
    }
}