File: shell_helpers.cpp

package info (click to toggle)
duckdb 1.5.1-2
links: PTS, VCS
area: main
in suites:
size: 299,196 kB
sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (287 lines) | stat: -rw-r--r-- 9,782 bytes
parent folder | download | duplicates (3)
#include "shell_state.hpp"
#include "duckdb/common/string_util.hpp"
#include <thread>

namespace duckdb_shell {

void ShellState::Sleep(idx_t ms) {
	std::this_thread::sleep_for(std::chrono::milliseconds(ms));
}

// The below methods were present in the sqlite API wrappers - and have been ported to here
// Ideally they are eventually replaced with native code
void ShellState::GenerateRandomBytes(int N, void *pBuf) {
	static bool init = false;
	if (!init) {
		srand(time(NULL));
		init = true;
	}
	unsigned char *zBuf = (unsigned char *)pBuf;
	while (N--) {
		unsigned char nextByte = rand() % 255;
		zBuf[N] = nextByte;
	}
}

#define sqlite3Toupper(x) toupper((unsigned char)(x))
#define sqlite3Tolower(x) tolower((unsigned char)(x))

/*
** This lookup table is used to help decode the first byte of
** a multi-byte UTF8 character.
*/
static const unsigned char sqlite3Utf8Trans1[] = {
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};

uint32_t ReadCodepoint(const unsigned char **pz /* Pointer to string from which to read char */
) {
	unsigned int c;

	/* Same as READ_UTF8() above but without the zTerm parameter.
	** For this routine, we assume the UTF8 string is always zero-terminated.
	*/
	c = *((*pz)++);
	if (c >= 0xc0) {
		c = sqlite3Utf8Trans1[c - 0xc0];
		while ((*(*pz) & 0xc0) == 0x80) {
			c = (c << 6) + (0x3f & *((*pz)++));
		}
		if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE) {
			c = 0xFFFD;
		}
	}
	return c;
}

/*
** A structure defining how to do GLOB-style comparisons.
*/
struct compareInfo {
	uint8_t matchAll; /* "*" or "%" */
	uint8_t matchOne; /* "?" or "_" */
	uint8_t matchSet; /* "[" or 0 */
	uint8_t noCase;   /* true to ignore case differences */
};

/*
** For LIKE and GLOB matching on EBCDIC machines, assume that every
** character is exactly one byte in size.  Also, provde the Utf8Read()
** macro for fast reading of the next character in the common case where
** the next character is ASCII.
*/
#define Utf8Read(A) (A[0] < 0x80 ? *(A++) : ReadCodepoint(&A))

static const struct compareInfo globInfo = {'*', '?', '[', 0};
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case.  Thus  'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = {'%', '_', 0, 1};
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
// static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };

/*
** Possible error returns from patternMatch()
*/
#define SQLITE_MATCH           0
#define SQLITE_NOMATCH         1
#define SQLITE_NOWILDCARDMATCH 2

#define SQLITE_SKIP_UTF8(zIn)                                                                                          \
	{                                                                                                                  \
		if ((*(zIn++)) >= 0xc0) {                                                                                      \
			while ((*zIn & 0xc0) == 0x80) {                                                                            \
				zIn++;                                                                                                 \
			}                                                                                                          \
		}                                                                                                              \
	}

/*
** Compare two UTF-8 strings for equality where the first string is
** a GLOB or LIKE expression.  Return values:
**
**    SQLITE_MATCH:            Match
**    SQLITE_NOMATCH:          No match
**    SQLITE_NOWILDCARDMATCH:  No match in spite of having * or % wildcards.
**
** Globbing rules:
**
**      '*'       Matches any sequence of zero or more characters.
**
**      '?'       Matches exactly one character.
**
**     [...]      Matches one character from the enclosed list of
**                characters.
**
**     [^...]     Matches one character not in the enclosed list.
**
** With the [...] and [^...] matching, a ']' character can be included
** in the list by making it the first character after '[' or '^'.  A
** range of characters can be specified using '-'.  Example:
** "[a-z]" matches any single lower-case letter.  To match a '-', make
** it the last character in the list.
**
** Like matching rules:
**
**      '%'       Matches any sequence of zero or more characters
**
***     '_'       Matches any one character
**
**      Ec        Where E is the "esc" character and c is any other
**                character, including '%', '_', and esc, match exactly c.
**
** The comments within this routine usually assume glob matching.
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(const uint8_t *zPattern,         /* The glob pattern */
                          const uint8_t *zString,          /* The string to compare against the glob */
                          const struct compareInfo *pInfo, /* Information about how to do the compare */
                          uint32_t matchOther              /* The escape char (LIKE) or '[' (GLOB) */
) {
	uint32_t c, c2;                      /* Next pattern and input string chars */
	uint32_t matchOne = pInfo->matchOne; /* "?" or "_" */
	uint32_t matchAll = pInfo->matchAll; /* "*" or "%" */
	uint8_t noCase = pInfo->noCase;      /* True if uppercase==lowercase */
	const uint8_t *zEscaped = 0;         /* One past the last escaped input char */

	while ((c = Utf8Read(zPattern)) != 0) {
		if (c == matchAll) { /* Match "*" */
			/* Skip over multiple "*" characters in the pattern.  If there
			** are also "?" characters, skip those as well, but consume a
			** single character of the input string for each "?" skipped */
			while ((c = Utf8Read(zPattern)) == matchAll || c == matchOne) {
				if (c == matchOne && Utf8Read(zString) == 0) {
					return SQLITE_NOWILDCARDMATCH;
				}
			}
			if (c == 0) {
				return SQLITE_MATCH; /* "*" at the end of the pattern matches */
			} else if (c == matchOther) {
				if (pInfo->matchSet == 0) {
					c = Utf8Read(zPattern);
					if (c == 0)
						return SQLITE_NOWILDCARDMATCH;
				} else {
					/* "[...]" immediately follows the "*".  We have to do a slow
					** recursive search in this case, but it is an unusual case. */
					D_ASSERT(matchOther < 0x80); /* '[' is a single-byte character */
					while (*zString) {
						int bMatch = patternCompare(&zPattern[-1], zString, pInfo, matchOther);
						if (bMatch != SQLITE_NOMATCH)
							return bMatch;
						SQLITE_SKIP_UTF8(zString);
					}
					return SQLITE_NOWILDCARDMATCH;
				}
			}

			/* At this point variable c contains the first character of the
			** pattern string past the "*".  Search in the input string for the
			** first matching character and recursively continue the match from
			** that point.
			**
			** For a case-insensitive search, set variable cx to be the same as
			** c but in the other case and search the input string for either
			** c or cx.
			*/
			if (c <= 0x80) {
				char zStop[3];
				int bMatch;
				if (noCase) {
					zStop[0] = sqlite3Toupper(c);
					zStop[1] = sqlite3Tolower(c);
					zStop[2] = 0;
				} else {
					zStop[0] = c;
					zStop[1] = 0;
				}
				while (1) {
					zString += strcspn((const char *)zString, zStop);
					if (zString[0] == 0)
						break;
					zString++;
					bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
					if (bMatch != SQLITE_NOMATCH)
						return bMatch;
				}
			} else {
				int bMatch;
				while ((c2 = Utf8Read(zString)) != 0) {
					if (c2 != c)
						continue;
					bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
					if (bMatch != SQLITE_NOMATCH)
						return bMatch;
				}
			}
			return SQLITE_NOWILDCARDMATCH;
		}
		if (c == matchOther) {
			if (pInfo->matchSet == 0) {
				c = Utf8Read(zPattern);
				if (c == 0)
					return SQLITE_NOMATCH;
				zEscaped = zPattern;
			} else {
				uint32_t prior_c = 0;
				int seen = 0;
				int invert = 0;
				c = Utf8Read(zString);
				if (c == 0)
					return SQLITE_NOMATCH;
				c2 = Utf8Read(zPattern);
				if (c2 == '^') {
					invert = 1;
					c2 = Utf8Read(zPattern);
				}
				if (c2 == ']') {
					if (c == ']')
						seen = 1;
					c2 = Utf8Read(zPattern);
				}
				while (c2 && c2 != ']') {
					if (c2 == '-' && zPattern[0] != ']' && zPattern[0] != 0 && prior_c > 0) {
						c2 = Utf8Read(zPattern);
						if (c >= prior_c && c <= c2)
							seen = 1;
						prior_c = 0;
					} else {
						if (c == c2) {
							seen = 1;
						}
						prior_c = c2;
					}
					c2 = Utf8Read(zPattern);
				}
				if (c2 == 0 || (seen ^ invert) == 0) {
					return SQLITE_NOMATCH;
				}
				continue;
			}
		}
		c2 = Utf8Read(zString);
		if (c == c2)
			continue;
		if (noCase && sqlite3Tolower(c) == sqlite3Tolower(c2) && c < 0x80 && c2 < 0x80) {
			continue;
		}
		if (c == matchOne && zPattern != zEscaped && c2 != 0)
			continue;
		return SQLITE_NOMATCH;
	}
	return *zString == 0 ? SQLITE_MATCH : SQLITE_NOMATCH;
}

bool ShellState::StringGlob(const char *zGlobPattern, const char *zString) {
	return patternCompare((uint8_t *)zGlobPattern, (uint8_t *)zString, &globInfo, '[') == SQLITE_MATCH;
}

bool ShellState::StringLike(const char *zPattern, const char *zStr, unsigned int esc) {
	return patternCompare((uint8_t *)zPattern, (uint8_t *)zStr, &likeInfoNorm, esc) == SQLITE_MATCH;
}

} // namespace duckdb_shell