File: wvtclstring.h

package info (click to toggle)
wvstreams 4.6.1-5
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 6,972 kB
  • sloc: cpp: 64,200; ansic: 4,154; sh: 4,094; makefile: 545; perl: 402
file content (120 lines) | stat: -rw-r--r-- 4,435 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/* -*- Mode: C++ -*-
 * Worldvisions Weaver Software:
 *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
 * 
 * FIXME:
 *   It would be possible to represent arbitrary binary blobs using this
 *   technique, but we'd have to avoid using null-terminated strings in a few
 *   places, particularly in the input to wvtcl_escape().
 * 
 *   We could even make encoded binary blobs printable (although that's not
 *   _strictly_ necessary in all cases) by encoding non-printable characters
 *   using \x## notation, if wvtcl_escape() or wvtcl_unescape() supported it.
 */
/** \file
 * Functions to handle "tcl-style" strings and lists.
 * 
 * Using wvtcl_encode(), you can encode _any_ list of strings into a single
 * string, then reliably split the single string back into the list using
 * wvtcl_decode().
 * 
 * You can create recursive lists of lists by simply running wvtcl_encode()
 * on a list of strings returned from wvtcl_encode().
 * 
 * Example list encodings (all of the following lists have exactly 3 elements):
 *     foo blah weasels
 *     e1 elem2 {element 3}
 *     x1 {} "element 3"
 *     w x y\ z
 * 
 * Example list of lists:
 *     foo\ blah\ weasels {e1 elem2 {element 3}} {w x y\ z}
 */

#ifndef __WVTCLSTRING_H
#define __WVTCLSTRING_H

#include "wvbuf.h"
class WvStringMask;

// the default set of "nasties", ie. characters that need to be escaped if
// they occur somewhere in a string.
#define WVTCL_NASTY_SPACES_STR    " \t\n\r"
extern const WvStringMask WVTCL_NASTY_SPACES;

// Another default set of nasties, but only splitting on newlines
#define WVTCL_NASTY_NEWLINES_STR  "\n\r"
extern const WvStringMask WVTCL_NASTY_NEWLINES;

// {, }, \, and " are always considered "nasty."
#define WVTCL_ALWAYS_NASTY_CASE '{': case '}': case '\\': case '"'


// the default set of split characters, ie. characters that separate elements
// in a list.  If these characters appear unescaped and not between {} or ""
// in a list, they signify the end of the current element.
#define WVTCL_SPLITCHARS_STR " \t\n\r"
extern const WvStringMask WVTCL_SPLITCHARS;


/**
 * tcl-escape a string.  There are three ways to do this:
 *   1) Strings that need no escaping are unchanged.
 *   2) Strings containing characters in 'nasties' are usually encoded just
 *         by enclosing the unmodified string in braces.
 *         (For example, "foo blah" becomes "{foo blah}")
 *   3) Strings containing nasties _and_ unmatched braces are encoded using
 *         backslash notation.  (For example, " foo} " becomes "\ foo\}\ "
 */
WvString wvtcl_escape(WvStringParm s,
		      const WvStringMask &nasties = WVTCL_NASTY_SPACES);


/**
 * tcl-unescape a string.  This is generally the reverse of wvtcl_escape,
 * except we can reverse any backslashified or embraced string, even if it
 * doesn't follow the "simplest encoding" rules used by wvtcl_escape.  We
 * can also handle strings in double-quotes, ie. '"foo"' becomes 'foo'.
 */
WvString wvtcl_unescape(WvStringParm s);


/**
 * encode a tcl-style list.  This is easily done by tcl-escaping each
 * string in 'l', then appending the escaped strings together, separated by
 * the first char in splitchars.
 */
WvString wvtcl_encode(WvList<WvString> &l,
		      const WvStringMask &nasties = WVTCL_NASTY_SPACES,
		      const WvStringMask &splitchars = WVTCL_SPLITCHARS);

/**
 * Get a single tcl word from an input buffer, and return the rest of the
 * buffer untouched.  If no word can be created from the buffer, return
 * a null string and leave the buffer unmodified.
 */
WvString wvtcl_getword(WvBuf &buf,
		       const WvStringMask &splitchars = WVTCL_SPLITCHARS,
		       bool do_unescape = true);

/**
 * split a tcl-style list.  There are some special "convenience" features
 * here, which allow users to create lists more flexibly than wvtcl_encode
 * would do.
 * 
 * Elements of the list are separated by any number of any characters from
 * the 'splitchars' list.
 * 
 * Quotes are allowed around elements: '"foo"' becomes 'foo'.  These work
 * mostly like braces, except the string is assumed to be backslashified.
 * That is, '"\ "' becomes ' ', whereas '{\ }' becomes '\ ' (ie. the backslash
 * wouldn't be removed).
 * 
 * Zero-length elements must be represented by {}
 * 
 */
void wvtcl_decode(WvList<WvString> &l, WvStringParm _s,
		  const WvStringMask &splitchars = WVTCL_SPLITCHARS,
		  bool do_unescape = true);

#endif // __WVTCLSTRING_H