File: shell_helpers.cpp

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (287 lines) | stat: -rw-r--r-- 9,782 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#include "shell_state.hpp"
#include "duckdb/common/string_util.hpp"
#include <thread>

namespace duckdb_shell {

void ShellState::Sleep(idx_t ms) {
	std::this_thread::sleep_for(std::chrono::milliseconds(ms));
}

// The below methods were present in the sqlite API wrappers - and have been ported to here
// Ideally they are eventually replaced with native code
void ShellState::GenerateRandomBytes(int N, void *pBuf) {
	static bool init = false;
	if (!init) {
		srand(time(NULL));
		init = true;
	}
	unsigned char *zBuf = (unsigned char *)pBuf;
	while (N--) {
		unsigned char nextByte = rand() % 255;
		zBuf[N] = nextByte;
	}
}

#define sqlite3Toupper(x) toupper((unsigned char)(x))
#define sqlite3Tolower(x) tolower((unsigned char)(x))

/*
** This lookup table is used to help decode the first byte of
** a multi-byte UTF8 character.
*/
static const unsigned char sqlite3Utf8Trans1[] = {
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};

uint32_t ReadCodepoint(const unsigned char **pz /* Pointer to string from which to read char */
) {
	unsigned int c;

	/* Same as READ_UTF8() above but without the zTerm parameter.
	** For this routine, we assume the UTF8 string is always zero-terminated.
	*/
	c = *((*pz)++);
	if (c >= 0xc0) {
		c = sqlite3Utf8Trans1[c - 0xc0];
		while ((*(*pz) & 0xc0) == 0x80) {
			c = (c << 6) + (0x3f & *((*pz)++));
		}
		if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE) {
			c = 0xFFFD;
		}
	}
	return c;
}

/*
** A structure defining how to do GLOB-style comparisons.
*/
struct compareInfo {
	uint8_t matchAll; /* "*" or "%" */
	uint8_t matchOne; /* "?" or "_" */
	uint8_t matchSet; /* "[" or 0 */
	uint8_t noCase;   /* true to ignore case differences */
};

/*
** For LIKE and GLOB matching on EBCDIC machines, assume that every
** character is exactly one byte in size.  Also, provde the Utf8Read()
** macro for fast reading of the next character in the common case where
** the next character is ASCII.
*/
#define Utf8Read(A) (A[0] < 0x80 ? *(A++) : ReadCodepoint(&A))

static const struct compareInfo globInfo = {'*', '?', '[', 0};
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case.  Thus  'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = {'%', '_', 0, 1};
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
// static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };

/*
** Possible error returns from patternMatch()
*/
#define SQLITE_MATCH           0
#define SQLITE_NOMATCH         1
#define SQLITE_NOWILDCARDMATCH 2

#define SQLITE_SKIP_UTF8(zIn)                                                                                          \
	{                                                                                                                  \
		if ((*(zIn++)) >= 0xc0) {                                                                                      \
			while ((*zIn & 0xc0) == 0x80) {                                                                            \
				zIn++;                                                                                                 \
			}                                                                                                          \
		}                                                                                                              \
	}

/*
** Compare two UTF-8 strings for equality where the first string is
** a GLOB or LIKE expression.  Return values:
**
**    SQLITE_MATCH:            Match
**    SQLITE_NOMATCH:          No match
**    SQLITE_NOWILDCARDMATCH:  No match in spite of having * or % wildcards.
**
** Globbing rules:
**
**      '*'       Matches any sequence of zero or more characters.
**
**      '?'       Matches exactly one character.
**
**     [...]      Matches one character from the enclosed list of
**                characters.
**
**     [^...]     Matches one character not in the enclosed list.
**
** With the [...] and [^...] matching, a ']' character can be included
** in the list by making it the first character after '[' or '^'.  A
** range of characters can be specified using '-'.  Example:
** "[a-z]" matches any single lower-case letter.  To match a '-', make
** it the last character in the list.
**
** Like matching rules:
**
**      '%'       Matches any sequence of zero or more characters
**
***     '_'       Matches any one character
**
**      Ec        Where E is the "esc" character and c is any other
**                character, including '%', '_', and esc, match exactly c.
**
** The comments within this routine usually assume glob matching.
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(const uint8_t *zPattern,         /* The glob pattern */
                          const uint8_t *zString,          /* The string to compare against the glob */
                          const struct compareInfo *pInfo, /* Information about how to do the compare */
                          uint32_t matchOther              /* The escape char (LIKE) or '[' (GLOB) */
) {
	uint32_t c, c2;                      /* Next pattern and input string chars */
	uint32_t matchOne = pInfo->matchOne; /* "?" or "_" */
	uint32_t matchAll = pInfo->matchAll; /* "*" or "%" */
	uint8_t noCase = pInfo->noCase;      /* True if uppercase==lowercase */
	const uint8_t *zEscaped = 0;         /* One past the last escaped input char */

	while ((c = Utf8Read(zPattern)) != 0) {
		if (c == matchAll) { /* Match "*" */
			/* Skip over multiple "*" characters in the pattern.  If there
			** are also "?" characters, skip those as well, but consume a
			** single character of the input string for each "?" skipped */
			while ((c = Utf8Read(zPattern)) == matchAll || c == matchOne) {
				if (c == matchOne && Utf8Read(zString) == 0) {
					return SQLITE_NOWILDCARDMATCH;
				}
			}
			if (c == 0) {
				return SQLITE_MATCH; /* "*" at the end of the pattern matches */
			} else if (c == matchOther) {
				if (pInfo->matchSet == 0) {
					c = Utf8Read(zPattern);
					if (c == 0)
						return SQLITE_NOWILDCARDMATCH;
				} else {
					/* "[...]" immediately follows the "*".  We have to do a slow
					** recursive search in this case, but it is an unusual case. */
					D_ASSERT(matchOther < 0x80); /* '[' is a single-byte character */
					while (*zString) {
						int bMatch = patternCompare(&zPattern[-1], zString, pInfo, matchOther);
						if (bMatch != SQLITE_NOMATCH)
							return bMatch;
						SQLITE_SKIP_UTF8(zString);
					}
					return SQLITE_NOWILDCARDMATCH;
				}
			}

			/* At this point variable c contains the first character of the
			** pattern string past the "*".  Search in the input string for the
			** first matching character and recursively continue the match from
			** that point.
			**
			** For a case-insensitive search, set variable cx to be the same as
			** c but in the other case and search the input string for either
			** c or cx.
			*/
			if (c <= 0x80) {
				char zStop[3];
				int bMatch;
				if (noCase) {
					zStop[0] = sqlite3Toupper(c);
					zStop[1] = sqlite3Tolower(c);
					zStop[2] = 0;
				} else {
					zStop[0] = c;
					zStop[1] = 0;
				}
				while (1) {
					zString += strcspn((const char *)zString, zStop);
					if (zString[0] == 0)
						break;
					zString++;
					bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
					if (bMatch != SQLITE_NOMATCH)
						return bMatch;
				}
			} else {
				int bMatch;
				while ((c2 = Utf8Read(zString)) != 0) {
					if (c2 != c)
						continue;
					bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
					if (bMatch != SQLITE_NOMATCH)
						return bMatch;
				}
			}
			return SQLITE_NOWILDCARDMATCH;
		}
		if (c == matchOther) {
			if (pInfo->matchSet == 0) {
				c = Utf8Read(zPattern);
				if (c == 0)
					return SQLITE_NOMATCH;
				zEscaped = zPattern;
			} else {
				uint32_t prior_c = 0;
				int seen = 0;
				int invert = 0;
				c = Utf8Read(zString);
				if (c == 0)
					return SQLITE_NOMATCH;
				c2 = Utf8Read(zPattern);
				if (c2 == '^') {
					invert = 1;
					c2 = Utf8Read(zPattern);
				}
				if (c2 == ']') {
					if (c == ']')
						seen = 1;
					c2 = Utf8Read(zPattern);
				}
				while (c2 && c2 != ']') {
					if (c2 == '-' && zPattern[0] != ']' && zPattern[0] != 0 && prior_c > 0) {
						c2 = Utf8Read(zPattern);
						if (c >= prior_c && c <= c2)
							seen = 1;
						prior_c = 0;
					} else {
						if (c == c2) {
							seen = 1;
						}
						prior_c = c2;
					}
					c2 = Utf8Read(zPattern);
				}
				if (c2 == 0 || (seen ^ invert) == 0) {
					return SQLITE_NOMATCH;
				}
				continue;
			}
		}
		c2 = Utf8Read(zString);
		if (c == c2)
			continue;
		if (noCase && sqlite3Tolower(c) == sqlite3Tolower(c2) && c < 0x80 && c2 < 0x80) {
			continue;
		}
		if (c == matchOne && zPattern != zEscaped && c2 != 0)
			continue;
		return SQLITE_NOMATCH;
	}
	return *zString == 0 ? SQLITE_MATCH : SQLITE_NOMATCH;
}

bool ShellState::StringGlob(const char *zGlobPattern, const char *zString) {
	return patternCompare((uint8_t *)zGlobPattern, (uint8_t *)zString, &globInfo, '[') == SQLITE_MATCH;
}

bool ShellState::StringLike(const char *zPattern, const char *zStr, unsigned int esc) {
	return patternCompare((uint8_t *)zPattern, (uint8_t *)zStr, &likeInfoNorm, esc) == SQLITE_MATCH;
}

} // namespace duckdb_shell