1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
|
#include "shell_state.hpp"
#include "duckdb/common/string_util.hpp"
#include <thread>
namespace duckdb_shell {
void ShellState::Sleep(idx_t ms) {
std::this_thread::sleep_for(std::chrono::milliseconds(ms));
}
// The below methods were present in the sqlite API wrappers - and have been ported to here
// Ideally they are eventually replaced with native code
void ShellState::GenerateRandomBytes(int N, void *pBuf) {
static bool init = false;
if (!init) {
srand(time(NULL));
init = true;
}
unsigned char *zBuf = (unsigned char *)pBuf;
while (N--) {
unsigned char nextByte = rand() % 255;
zBuf[N] = nextByte;
}
}
#define sqlite3Toupper(x) toupper((unsigned char)(x))
#define sqlite3Tolower(x) tolower((unsigned char)(x))
/*
** This lookup table is used to help decode the first byte of
** a multi-byte UTF8 character.
*/
static const unsigned char sqlite3Utf8Trans1[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};
uint32_t ReadCodepoint(const unsigned char **pz /* Pointer to string from which to read char */
) {
unsigned int c;
/* Same as READ_UTF8() above but without the zTerm parameter.
** For this routine, we assume the UTF8 string is always zero-terminated.
*/
c = *((*pz)++);
if (c >= 0xc0) {
c = sqlite3Utf8Trans1[c - 0xc0];
while ((*(*pz) & 0xc0) == 0x80) {
c = (c << 6) + (0x3f & *((*pz)++));
}
if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE) {
c = 0xFFFD;
}
}
return c;
}
/*
** A structure defining how to do GLOB-style comparisons.
*/
struct compareInfo {
uint8_t matchAll; /* "*" or "%" */
uint8_t matchOne; /* "?" or "_" */
uint8_t matchSet; /* "[" or 0 */
uint8_t noCase; /* true to ignore case differences */
};
/*
** For LIKE and GLOB matching on EBCDIC machines, assume that every
** character is exactly one byte in size. Also, provde the Utf8Read()
** macro for fast reading of the next character in the common case where
** the next character is ASCII.
*/
#define Utf8Read(A) (A[0] < 0x80 ? *(A++) : ReadCodepoint(&A))
static const struct compareInfo globInfo = {'*', '?', '[', 0};
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case. Thus 'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = {'%', '_', 0, 1};
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
// static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
/*
** Possible error returns from patternMatch()
*/
#define SQLITE_MATCH 0
#define SQLITE_NOMATCH 1
#define SQLITE_NOWILDCARDMATCH 2
#define SQLITE_SKIP_UTF8(zIn) \
{ \
if ((*(zIn++)) >= 0xc0) { \
while ((*zIn & 0xc0) == 0x80) { \
zIn++; \
} \
} \
}
/*
** Compare two UTF-8 strings for equality where the first string is
** a GLOB or LIKE expression. Return values:
**
** SQLITE_MATCH: Match
** SQLITE_NOMATCH: No match
** SQLITE_NOWILDCARDMATCH: No match in spite of having * or % wildcards.
**
** Globbing rules:
**
** '*' Matches any sequence of zero or more characters.
**
** '?' Matches exactly one character.
**
** [...] Matches one character from the enclosed list of
** characters.
**
** [^...] Matches one character not in the enclosed list.
**
** With the [...] and [^...] matching, a ']' character can be included
** in the list by making it the first character after '[' or '^'. A
** range of characters can be specified using '-'. Example:
** "[a-z]" matches any single lower-case letter. To match a '-', make
** it the last character in the list.
**
** Like matching rules:
**
** '%' Matches any sequence of zero or more characters
**
*** '_' Matches any one character
**
** Ec Where E is the "esc" character and c is any other
** character, including '%', '_', and esc, match exactly c.
**
** The comments within this routine usually assume glob matching.
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(const uint8_t *zPattern, /* The glob pattern */
const uint8_t *zString, /* The string to compare against the glob */
const struct compareInfo *pInfo, /* Information about how to do the compare */
uint32_t matchOther /* The escape char (LIKE) or '[' (GLOB) */
) {
uint32_t c, c2; /* Next pattern and input string chars */
uint32_t matchOne = pInfo->matchOne; /* "?" or "_" */
uint32_t matchAll = pInfo->matchAll; /* "*" or "%" */
uint8_t noCase = pInfo->noCase; /* True if uppercase==lowercase */
const uint8_t *zEscaped = 0; /* One past the last escaped input char */
while ((c = Utf8Read(zPattern)) != 0) {
if (c == matchAll) { /* Match "*" */
/* Skip over multiple "*" characters in the pattern. If there
** are also "?" characters, skip those as well, but consume a
** single character of the input string for each "?" skipped */
while ((c = Utf8Read(zPattern)) == matchAll || c == matchOne) {
if (c == matchOne && Utf8Read(zString) == 0) {
return SQLITE_NOWILDCARDMATCH;
}
}
if (c == 0) {
return SQLITE_MATCH; /* "*" at the end of the pattern matches */
} else if (c == matchOther) {
if (pInfo->matchSet == 0) {
c = Utf8Read(zPattern);
if (c == 0)
return SQLITE_NOWILDCARDMATCH;
} else {
/* "[...]" immediately follows the "*". We have to do a slow
** recursive search in this case, but it is an unusual case. */
D_ASSERT(matchOther < 0x80); /* '[' is a single-byte character */
while (*zString) {
int bMatch = patternCompare(&zPattern[-1], zString, pInfo, matchOther);
if (bMatch != SQLITE_NOMATCH)
return bMatch;
SQLITE_SKIP_UTF8(zString);
}
return SQLITE_NOWILDCARDMATCH;
}
}
/* At this point variable c contains the first character of the
** pattern string past the "*". Search in the input string for the
** first matching character and recursively continue the match from
** that point.
**
** For a case-insensitive search, set variable cx to be the same as
** c but in the other case and search the input string for either
** c or cx.
*/
if (c <= 0x80) {
char zStop[3];
int bMatch;
if (noCase) {
zStop[0] = sqlite3Toupper(c);
zStop[1] = sqlite3Tolower(c);
zStop[2] = 0;
} else {
zStop[0] = c;
zStop[1] = 0;
}
while (1) {
zString += strcspn((const char *)zString, zStop);
if (zString[0] == 0)
break;
zString++;
bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
if (bMatch != SQLITE_NOMATCH)
return bMatch;
}
} else {
int bMatch;
while ((c2 = Utf8Read(zString)) != 0) {
if (c2 != c)
continue;
bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
if (bMatch != SQLITE_NOMATCH)
return bMatch;
}
}
return SQLITE_NOWILDCARDMATCH;
}
if (c == matchOther) {
if (pInfo->matchSet == 0) {
c = Utf8Read(zPattern);
if (c == 0)
return SQLITE_NOMATCH;
zEscaped = zPattern;
} else {
uint32_t prior_c = 0;
int seen = 0;
int invert = 0;
c = Utf8Read(zString);
if (c == 0)
return SQLITE_NOMATCH;
c2 = Utf8Read(zPattern);
if (c2 == '^') {
invert = 1;
c2 = Utf8Read(zPattern);
}
if (c2 == ']') {
if (c == ']')
seen = 1;
c2 = Utf8Read(zPattern);
}
while (c2 && c2 != ']') {
if (c2 == '-' && zPattern[0] != ']' && zPattern[0] != 0 && prior_c > 0) {
c2 = Utf8Read(zPattern);
if (c >= prior_c && c <= c2)
seen = 1;
prior_c = 0;
} else {
if (c == c2) {
seen = 1;
}
prior_c = c2;
}
c2 = Utf8Read(zPattern);
}
if (c2 == 0 || (seen ^ invert) == 0) {
return SQLITE_NOMATCH;
}
continue;
}
}
c2 = Utf8Read(zString);
if (c == c2)
continue;
if (noCase && sqlite3Tolower(c) == sqlite3Tolower(c2) && c < 0x80 && c2 < 0x80) {
continue;
}
if (c == matchOne && zPattern != zEscaped && c2 != 0)
continue;
return SQLITE_NOMATCH;
}
return *zString == 0 ? SQLITE_MATCH : SQLITE_NOMATCH;
}
bool ShellState::StringGlob(const char *zGlobPattern, const char *zString) {
return patternCompare((uint8_t *)zGlobPattern, (uint8_t *)zString, &globInfo, '[') == SQLITE_MATCH;
}
bool ShellState::StringLike(const char *zPattern, const char *zStr, unsigned int esc) {
return patternCompare((uint8_t *)zPattern, (uint8_t *)zStr, &likeInfoNorm, esc) == SQLITE_MATCH;
}
} // namespace duckdb_shell
|