File: mysql-parser-common.cpp

package info (click to toggle)
mysql-workbench 6.2.3%2Bdfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 102,612 kB
  • ctags: 84,593
  • sloc: ansic: 804,682; cpp: 438,759; yacc: 59,129; python: 54,293; xml: 48,851; sql: 5,512; objc: 1,414; makefile: 505; sh: 455; java: 237; ruby: 6; perl: 5; php: 1
file content (444 lines) | stat: -rw-r--r-- 12,408 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
/* 
 * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; version 2 of the
 * License.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301  USA
 */

#include <sstream>
#include <string>
#include <set>
#include <stack>
#include <vector>
#include <map>

#include <antlr3.h>

#include "MySQLLexer.h"  // The generated lexer.
#include "MySQLParser.h"  // The generated lexer.

#include "base/log.h"
#include "base/string_utilities.h"

#include "mysql-parser-common.h"
#include "mysql-parser.h"
#include "mysql-scanner.h"

DEFAULT_LOG_DOMAIN("MySQL parsing")

extern "C" {
  
  /**
   * Checks the given identifier whether it is a defined charset, which directs
   * the lexer to classify it appropriately.
   */
  ANTLR3_UINT32 check_charset(void *payload, pANTLR3_STRING text)
  {
    // Get the actual token text and skip the initial underscore char.
    // There's an additional char at the end of the input for some reason (maybe a lexer bug),
    // so we also ignore the last char.
    MySQLRecognitionBase *base = (MySQLRecognitionBase*)payload;

    std::string token_text((const char*)text->chars + 1, text->len - 2);
    if (base->is_charset(base::tolower(token_text)))
      return UNDERSCORE_CHARSET;

    return IDENTIFIER;
  }
  
  /**
   * Checks the given text if it is equal to "\N" (w/o quotes and in uppercase). We need this extra
   * check as our lexer is case insensitive.
   */
  ANTLR3_UINT32 check_null(pANTLR3_STRING text)
  {
    std::string token_text((const char*)text->chars, text->len - 1);
    if (token_text == "\\N")
      return NULL2_SYMBOL;
    return ANTLR3_TOKEN_INVALID;
  }

} // extern "C"

//----------------- MySQLRecognitionBase ---------------------------------------------------------------

class MySQLRecognitionBase::Private
{
public:
  std::set<std::string> _charsets; // A list of supported charsets.
  unsigned _sql_mode;
  std::vector<MySQLParserErrorInfo> _error_info;
};

MySQLRecognitionBase::MySQLRecognitionBase(const std::set<std::string> &charsets)
{
  d = new Private();
  d->_charsets = charsets;
  d->_sql_mode = 0;
}

//--------------------------------------------------------------------------------------------------

void MySQLRecognitionBase::add_error(const std::string &message, ANTLR3_UINT32 token,
  ANTLR3_MARKER token_start, ANTLR3_UINT32 line, ANTLR3_UINT32 offset_in_line, ANTLR3_MARKER length)
{
  MySQLParserErrorInfo info = { message, token, (size_t)(token_start - (ANTLR3_MARKER)text()),
    line, offset_in_line, (size_t)length};
  d->_error_info.push_back(info);
};

//--------------------------------------------------------------------------------------------------

const std::vector<MySQLParserErrorInfo>& MySQLRecognitionBase::error_info()
{
  return d->_error_info;
}

//--------------------------------------------------------------------------------------------------

bool MySQLRecognitionBase::has_errors()
{
  return d->_error_info.size() > 0;
}

//--------------------------------------------------------------------------------------------------

unsigned MySQLRecognitionBase::sql_mode()
{
  return d->_sql_mode;
}

//--------------------------------------------------------------------------------------------------

void MySQLRecognitionBase::set_sql_mode(const std::string &sql_mode)
{
  unsigned result = 0;

  std::string sql_mode_string = base::toupper(sql_mode);
  std::istringstream iss(sql_mode_string);
  std::string mode;
  while (std::getline(iss, mode, ','))
  {
    mode = base::trim(mode);
    if (mode == "ANSI" || mode == "DB2" || mode == "MAXDB" || mode == "MSSQL" || mode == "ORACLE" ||
        mode == "POSTGRESQL")
      result |= SQL_MODE_ANSI_QUOTES | SQL_MODE_PIPES_AS_CONCAT | SQL_MODE_IGNORE_SPACE;
    else if (mode == "ANSI_QUOTES")
      result |= SQL_MODE_ANSI_QUOTES;
    else if (mode == "PIPES_AS_CONCAT")
      result |= SQL_MODE_PIPES_AS_CONCAT;
    else if (mode == "NO_BACKSLASH_ESCAPES")
      result |= SQL_MODE_NO_BACKSLASH_ESCAPES;
    else if (mode == "IGNORE_SPACE")
      result |= SQL_MODE_IGNORE_SPACE;
    else if (mode == "HIGH_NOT_PRECEDENCE" || mode == "MYSQL323" || mode == "MYSQL40")
      result |= SQL_MODE_HIGH_NOT_PRECEDENCE;
  }

  d->_sql_mode = result;
}

//--------------------------------------------------------------------------------------------------

/**
 * Determines if the given string is one of the supported charsets.
 */
bool MySQLRecognitionBase::is_charset(const std::string &s)
{
  return d->_charsets.find(s) != d->_charsets.end();
}

//--------------------------------------------------------------------------------------------------

/**
 * Returns true if the given token is an identifier.
 */
bool MySQLRecognitionBase::is_identifier(ANTLR3_UINT32 type)
{
  bool result = (type == IDENTIFIER) || (type == BACK_TICK_QUOTED_ID);
  if (!result)
  {
    // Symbols are sorted so that keywords allowed as identifiers are in a continuous range
    // making this check easy (and reduce the parser size by 300% compared to generated token ids).
    result = (type >= ACTION_SYMBOL && type <= PARTITION_SYMBOL);

    if (!result)
    {
      // Double quoted text represents identifiers only if the ANSI QUOTES sql mode is active.
      result = ((d->_sql_mode & SQL_MODE_ANSI_QUOTES) != 0) && (type == DOUBLE_QUOTED_TEXT);
    }
  }
  return result;
}

//--------------------------------------------------------------------------------------------------

/**
* Returns the token value for a given keyword, which can be used to do search/replace operations.
* Returns -1 if the keyword cannot be found.
*/
static std::map<std::string, size_t> keywords; // One map for all recognizers.
extern "C" pANTLR3_UINT8 MySQLParserTokenNames[]; // Defined in MySQLParser.

size_t MySQLRecognitionBase::get_keyword_token(const std::string &keyword)
{
  if (keywords.size() == 0)
  {
    for (size_t i = 4; i <= ZEROFILL_SYMBOL; ++i)
    {
      std::string name((char*)MySQLParserTokenNames[i]);
      if (base::ends_with(name, "_SYMBOL"))
        keywords[name.substr(0, name.size() - 7)] = i;
      else if (base::ends_with(name, "_OPERATOR"))
        keywords[name.substr(0, name.size() - 9)] = i;
      else
        keywords[name] = i;
    }
  }
  
  std::string lookup = base::toupper(keyword);
  if (keywords.find(lookup) == keywords.end())
    return -1;
  return keywords[lookup];
}

//--------------------------------------------------------------------------------------------------

/**
 * Returns true if the given token is a keyword.
 */
bool MySQLRecognitionBase::is_keyword(ANTLR3_UINT32 type)
{
  if (type >= ACTION_SYMBOL && type <= PARTITION_SYMBOL)
    return true;

  switch (type)
  {
  case AT_SIGN_SYMBOL:
  case AT_AT_SIGN_SYMBOL:
  case BACK_TICK:
  case BACK_TICK_QUOTED_ID:
  case BITNUMBER:
  case BITSTRING:
  case BITWISE_AND_OPERATOR:
  case BITWISE_NOT_OPERATOR:
  case BITWISE_OR_OPERATOR:
  case BITWISE_XOR_OPERATOR:
  case CLOSE_PAR_SYMBOL:
  case COLON_SYMBOL:
  case COMMA_SYMBOL:
  case DASHDASH_COMMENT:
  case DIGIT:
  case DIGITS:
  case DIV_OPERATOR:
  case DOT_SYMBOL:
  case DOUBLE_QUOTE:
  case DOUBLE_QUOTED_TEXT:
  case EQUAL_OPERATOR:
  case ESCAPE_OPERATOR:
  case EXPRESSION_TOKEN:
  case FIELD_NAME_TOKEN:
  case FLOAT:
  case FUNCTION_CALL_TOKEN:
  case GREATER_OR_EQUAL_OPERATOR:
  case GREATER_THAN_OPERATOR:
  case HEXDIGIT:
  case HEXNUMBER:
  case HEXSTRING:
  case IDENTIFIER:
  case INDEX_HINT_LIST_TOKEN:
  case INTEGER:
  case JOIN_EXPR_TOKEN:
  case LESS_OR_EQUAL_OPERATOR:
  case LESS_THAN_OPERATOR:
  case LETTER_WHEN_UNQUOTED:
  case LOGICAL_AND_OPERATOR:
  case LOGICAL_NOT_OPERATOR:
  case LOGICAL_OR_OPERATOR:
  case MINUS_OPERATOR:
  case ML_COMMENT_END:
  case ML_COMMENT_HEAD:
  case MOD_OPERATOR:
  case MULT_OPERATOR:
  case NCHAR_TEXT:
  case NOT_EQUAL2_OPERATOR:
  case NOT_EQUAL_OPERATOR:
  case NULL2_SYMBOL:
  case NULL_SAFE_EQUAL_OPERATOR:
  case OPEN_PAR_SYMBOL:
  case PARAM_MARKER:
  case PAR_EXPRESSION_TOKEN:
  case PLUS_OPERATOR:
  case POUND_COMMENT:
  case SEMICOLON_SYMBOL:
  case SHIFT_LEFT_OPERATOR:
  case SHIFT_RIGHT_OPERATOR:
  case SINGLE_QUOTE:
  case SINGLE_QUOTED_TEXT:
  case STRING_TOKEN:
  case SUBQUERY_TOKEN:
  case TABLE_NAME_TOKEN:
  case UNDERLINE_SYMBOL:
  case UNDERSCORE_CHARSET:
  case VERSION_COMMENT:
  case VERSION_COMMENT_END:
  case VERSION_COMMENT_INTRODUCER:
  case VERSION_COMMENT_START_TOKEN:
  case VERSION_COMMENT_TAIL:
  case WS:
  case XA_ID_TOKEN:
    return false;

  default:
    return true;
  }
}

//--------------------------------------------------------------------------------------------------

/**
 * Returns true if the given token is a relation token.
 */
bool MySQLRecognitionBase::is_relation(ANTLR3_UINT32 type)
{
  switch (type)
  {
    case EQUAL_OPERATOR:
    case ASSIGN_OPERATOR:
    case NULL_SAFE_EQUAL_OPERATOR:
    case GREATER_OR_EQUAL_OPERATOR:
    case GREATER_THAN_OPERATOR:
    case LESS_OR_EQUAL_OPERATOR:
    case LESS_THAN_OPERATOR:
    case NOT_EQUAL_OPERATOR:
    case NOT_EQUAL2_OPERATOR:
    case PLUS_OPERATOR:
    case MINUS_OPERATOR:
    case MULT_OPERATOR:
    case DIV_OPERATOR:
    case MOD_OPERATOR:
    case LOGICAL_NOT_OPERATOR:
    case BITWISE_NOT_OPERATOR:
    case SHIFT_LEFT_OPERATOR:
    case SHIFT_RIGHT_OPERATOR:
    case LOGICAL_AND_OPERATOR:
    case BITWISE_AND_OPERATOR:
    case BITWISE_XOR_OPERATOR:
    case LOGICAL_OR_OPERATOR:
    case BITWISE_OR_OPERATOR:

    case OR_SYMBOL:
    case XOR_SYMBOL:
    case AND_SYMBOL:
    case IS_SYMBOL:
    case BETWEEN_SYMBOL:
    case LIKE_SYMBOL:
    case REGEXP_SYMBOL:
    case IN_SYMBOL:
    case SOUNDS_SYMBOL:
    case NOT_SYMBOL:
      return true;

  default:
    return false;
  }
}

//--------------------------------------------------------------------------------------------------

/**
 * Returns true if the given token is a number type.
 */
bool MySQLRecognitionBase::is_number(ANTLR3_UINT32 type)
{
  switch (type)
  {
    case INTEGER:
    case FLOAT:
    case HEXNUMBER:
    case HEXSTRING:
    case BITNUMBER:
    case BITSTRING:
      return true;

  default:
    return false;
  }
}

//--------------------------------------------------------------------------------------------------

/**
 * Returns true if the given token is an operator or punctuation character.
 */
bool MySQLRecognitionBase::is_operator(ANTLR3_UINT32 type)
{
  switch (type)
  {
    case EQUAL_OPERATOR:
    case ASSIGN_OPERATOR:
    case NULL_SAFE_EQUAL_OPERATOR:
    case GREATER_OR_EQUAL_OPERATOR:
    case GREATER_THAN_OPERATOR:
    case LESS_OR_EQUAL_OPERATOR:
    case LESS_THAN_OPERATOR:
    case NOT_EQUAL_OPERATOR:
    case NOT_EQUAL2_OPERATOR:
    case PLUS_OPERATOR:
    case MINUS_OPERATOR:
    case MULT_OPERATOR:
    case DIV_OPERATOR:
    case MOD_OPERATOR:
    case LOGICAL_NOT_OPERATOR:
    case BITWISE_NOT_OPERATOR:
    case SHIFT_LEFT_OPERATOR:
    case SHIFT_RIGHT_OPERATOR:
    case LOGICAL_AND_OPERATOR:
    case BITWISE_AND_OPERATOR:
    case BITWISE_XOR_OPERATOR:
    case LOGICAL_OR_OPERATOR:
    case BITWISE_OR_OPERATOR:

    case DOT_SYMBOL:
    case COMMA_SYMBOL:
    case SEMICOLON_SYMBOL:
    case COLON_SYMBOL:
    case OPEN_PAR_SYMBOL:
    case CLOSE_PAR_SYMBOL:
    case AT_SIGN_SYMBOL:
    case AT_AT_SIGN_SYMBOL:
    case PARAM_MARKER:
      return true;

  default:
    return false;
  }
}

//--------------------------------------------------------------------------------------------------

bool MySQLRecognitionBase::is_subtree(struct ANTLR3_BASE_TREE_struct *tree)
{
  return tree->getChildCount(tree) > 0;
}

//--------------------------------------------------------------------------------------------------

void MySQLRecognitionBase::reset()
{
  d->_error_info.clear();
}

//--------------------------------------------------------------------------------------------------