File: src_backend_parser_scansup.cpp

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (133 lines) | stat: -rwxr-xr-x 3,972 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*--------------------------------------------------------------------
 * Symbols referenced in this file:
 * - truncate_identifier
 * - downcase_truncate_identifier
 * - downcase_identifier
 * - scanner_isspace
 *--------------------------------------------------------------------
 */

/*-------------------------------------------------------------------------
 *
 * scansup.c
 *	  support routines for the lex/flex scanner, used by both the normal
 * backend as well as the bootstrap backend
 *
 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  src/backend/parser/scansup.c
 *
 *-------------------------------------------------------------------------
 */
#include "pg_functions.hpp"
#include <string.h>

#include <ctype.h>

#include "parser/scansup.hpp"
#include "mb/pg_wchar.hpp"

#ifdef __MVS__
#include <zos-tls.h>
#endif

namespace duckdb_libpgquery {

/* ----------------
 *		scanstr
 *
 * if the string passed in has escaped codes, map the escape codes to actual
 * chars
 *
 * the string returned is palloc'd and should eventually be pfree'd by the
 * caller!
 * ----------------
 */

/*
 * downcase_truncate_identifier() --- do appropriate downcasing and
 * truncation of an unquoted identifier.  Optionally warn of truncation.
 *
 * Returns a palloc'd string containing the adjusted identifier.
 *
 * Note: in some usages the passed string is not null-terminated.
 *
 * Note: the API of this function is designed to allow for downcasing
 * transformations that increase the string length, but we don't yet
 * support that.  If you want to implement it, you'll need to fix
 * SplitIdentifierString() in utils/adt/varlena.c.
 */
char *downcase_truncate_identifier(const char *ident, int len, bool warn) {
	return downcase_identifier(ident, len, warn, true);
}

#ifdef __MVS__
static __tlssim<bool> pg_preserve_identifier_case_impl(false);
#define pg_preserve_identifier_case (*pg_preserve_identifier_case_impl.access())
#else
static __thread bool pg_preserve_identifier_case = false;
#endif

void set_preserve_identifier_case(bool preserve) {
	pg_preserve_identifier_case = preserve;
}

bool get_preserve_identifier_case() {
	return pg_preserve_identifier_case;
}

/*
 * a workhorse for downcase_truncate_identifier
 */
char *downcase_identifier(const char *ident, int len, bool warn, bool truncate) {
	char *result;
	int i;
	bool enc_is_single_byte;

	result = (char *)palloc(len + 1);
	enc_is_single_byte = pg_database_encoding_max_length() == 1;

	/*
	 * SQL99 specifies Unicode-aware case normalization, which we don't yet
	 * have the infrastructure for.  Instead we use tolower() to provide a
	 * locale-aware translation.  However, there are some locales where this
	 * is not right either (eg, Turkish may do strange things with 'i' and
	 * 'I').  Our current compromise is to use tolower() for characters with
	 * the high bit set, as long as they aren't part of a multi-byte
	 * character, and use an ASCII-only downcasing for 7-bit characters.
	 */
	for (i = 0; i < len; i++) {
		unsigned char ch = (unsigned char)ident[i];

		if (!get_preserve_identifier_case()) {
			if (ch >= 'A' && ch <= 'Z')
				ch += 'a' - 'A';
			else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
				ch = tolower(ch);
		}
		result[i] = (char)ch;
	}
	result[i] = '\0';

	return result;
}

/*
 * scanner_isspace() --- return true if flex scanner considers char whitespace
 *
 * This should be used instead of the potentially locale-dependent isspace()
 * function when it's important to match the lexer's behavior.
 *
 * In principle we might need similar functions for isalnum etc, but for the
 * moment only isspace seems needed.
 */
bool scanner_isspace(char ch) {
	/* This must match scan.l's list of {space} characters */
	if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\f')
		return true;
	return false;
}
}