File: java.lex

package info (click to toggle)
libxtc-rats-java 1.15.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 18,580 kB
  • sloc: java: 288,962; ansic: 20,406; ml: 14,775; makefile: 1,358; cpp: 621; sh: 352; xml: 259; perl: 123
file content (363 lines) | stat: -rw-r--r-- 10,924 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
/* cc.lex            see license.txt for copyright and terms of use
 * flex description of scanner for Java souce
 */

/* ----------------------- C definitions ---------------------- */
%{

#include "lexer.h"       // Lexer class

// this works around a problem with cygwin & fileno
#define YY_NEVER_INTERACTIVE 1

%}


/* -------------------- flex options ------------------ */
/* no wrapping is needed; setting this means we don't have to link with libfl.a */
%option noyywrap

/* don't use the default-echo rules */
%option nodefault

/* I don't call unput */
%option nounput

/* generate a c++ lexer */
%option c++

/* use the "fast" algorithm with no table compression */
%option full

/* utilize character equivalence classes */
%option ecs

/* the scanner is never interactive */
%option never-interactive

/* and I will define the class (lexer.h) */
%option yyclass="Lexer"

/* output file name */
  /* dsw: Arg!  Don't do this, since it is not overrideable from the
     command line with -o, which I consider to be a flex bug. */
  /* sm: fair enough; agreed */
  /* %option outfile="lexer.yy.cc" */

/* ------------------- definitions -------------------- */
/* newline */
NL            [\n\r]

/* anything but newline */
NOTNL         .

/* any of 256 source characters */
ANY           ({NOTNL}|{NL})

/* backslash */
BACKSL        "\\"

/* end of line (would like EOF to qualify also, but flex doesn't allow it */
EOL           {NL}

/* letter or underscore */
LETTER        [A-Za-z_\$]

/* letter or underscore or digit */
ALNUM         [A-Za-z_\$0-9]

/* digit */
DIGIT         [0-9]
HEXDIGIT      [0-9A-Fa-f]

/* sequence of decimal digits */
DIGITS        ({DIGIT}+)

/* integer suffix */
INT_SUFFIX    [lL]

/* sign of a number */
SIGN          ("+"|"-")

/* floating-point exponent */
EXPONENT      ([eE]{SIGN}?{DIGITS})

/* floating-point suffix letter */
FLOAT_SUFFIX  [fdFD]

/* normal string character: any but quote, newline, or backslash */
STRCHAR       [^\"\n\r\\]

/* (start of) an escape sequence */
ESCAPE        ({BACKSL}{ANY})

/* double quote */
QUOTE         [\"]

/* normal character literal character: any but single-quote, newline, or backslash */
CCHAR         [^\'\n\r\\]

/* single quote */
TICK          [\']


/* ------------- token definition rules --------------- */
%%

  /* this comment is replaced, by an external script, with whatever
   * additional rules a language extension author desires */
  /* EXTENSION RULES GO HERE */


  /* operators, punctuators and keywords: tokens with one spelling */
"abstract"         return tok(TOK_ABSTRACT);
"boolean"          return tok(TOK_BOOLEAN);
"break"            return tok(TOK_BREAK);
"byte"             return tok(TOK_BYTE);
"case"             return tok(TOK_CASE);
"catch"            return tok(TOK_CATCH);
"char"             return tok(TOK_CHAR);
"class"            return tok(TOK_CLASS);
"const"            return tok(TOK_CONST);
"continue"         return tok(TOK_CONTINUE);
"default"          return tok(TOK_DEFAULT);
"do"               return tok(TOK_DO);
"double"           return tok(TOK_DOUBLE);
"else"             return tok(TOK_ELSE);
"extends"          return tok(TOK_EXTENDS);
"false"            return tok(TOK_FALSE);
"final"            return tok(TOK_FINAL);
"finally"          return tok(TOK_FINALLY);
"float"            return tok(TOK_FLOAT);
"for"              return tok(TOK_FOR);
"goto"             return tok(TOK_GOTO);
"if"               return tok(TOK_IF);
"implements"       return tok(TOK_IMPLEMENTS);
"import"           return tok(TOK_IMPORT);
"instanceof"       return tok(TOK_INSTANCEOF);
"int"              return tok(TOK_INT);
"interface"        return tok(TOK_INTERFACE);
"long"             return tok(TOK_LONG);
"native"           return tok(TOK_NATIVE);
"new"              return tok(TOK_NEW);
"null"             return tok(TOK_NULL);
"package"          return tok(TOK_PACKAGE);
"private"          return tok(TOK_PRIVATE);
"protected"        return tok(TOK_PROTECTED);
"public"           return tok(TOK_PUBLIC);
"return"           return tok(TOK_RETURN);
"short"            return tok(TOK_SHORT);
"static"           return tok(TOK_STATIC);
"strictfp"         return tok(TOK_STRICTFP);
"super"            return tok(TOK_SUPER);
"switch"           return tok(TOK_SWITCH);
"synchronized"     return tok(TOK_SYNCHRONIZED);
"this"             return tok(TOK_THIS);
"throw"            return tok(TOK_THROW);
"throws"           return tok(TOK_THROWS);
"transient"        return tok(TOK_TRANSIENT);
"true"             return tok(TOK_TRUE);
"try"              return tok(TOK_TRY);
"void"             return tok(TOK_VOID);
"volatile"         return tok(TOK_VOLATILE);
"while"            return tok(TOK_WHILE);

"("                return tok(TOK_LPAREN);
")"                return tok(TOK_RPAREN);
"["                return tok(TOK_LBRACKET);
"]"                return tok(TOK_RBRACKET);
"."                return tok(TOK_DOT);
"!"                return tok(TOK_BANG);
"~"                return tok(TOK_TILDE);
"+"                return tok(TOK_PLUS);
"-"                return tok(TOK_MINUS);
"++"               return tok(TOK_PLUSPLUS);
"--"               return tok(TOK_MINUSMINUS);
"&"                return tok(TOK_AND);
"*"                return tok(TOK_STAR);
"/"                return tok(TOK_SLASH);
"%"                return tok(TOK_PERCENT);
"<<"               return tok(TOK_LEFTSHIFT);
">>"               return tok(TOK_RIGHTSHIFT);
">>>"              return tok(TOK_URIGHTSHIFT);
"<"                return tok(TOK_LESSTHAN);
"<="               return tok(TOK_LESSEQ);
">"                return tok(TOK_GREATERTHAN);
">="               return tok(TOK_GREATEREQ);
"=="               return tok(TOK_EQUALEQUAL);
"!="               return tok(TOK_NOTEQUAL);
"^"                return tok(TOK_XOR);
"|"                return tok(TOK_OR);
"&&"               return tok(TOK_ANDAND);
"||"               return tok(TOK_OROR);
"?"                return tok(TOK_QUESTION);
":"                return tok(TOK_COLON);
"="                return tok(TOK_EQUAL);
"*="               return tok(TOK_STAREQUAL);
"/="               return tok(TOK_SLASHEQUAL);
"%="               return tok(TOK_PERCENTEQUAL);
"+="               return tok(TOK_PLUSEQUAL);
"-="               return tok(TOK_MINUSEQUAL);
"&="               return tok(TOK_ANDEQUAL);
"^="               return tok(TOK_XOREQUAL);
"|="               return tok(TOK_OREQUAL);
"<<="              return tok(TOK_LEFTSHIFTEQUAL);
">>="              return tok(TOK_RIGHTSHIFTEQUAL);
">>>="             return tok(TOK_URIGHTSHIFTEQUAL);
","                return tok(TOK_COMMA);
";"                return tok(TOK_SEMICOLON);
"{"                return tok(TOK_LBRACE);
"}"                return tok(TOK_RBRACE);

  /* identifier: e.g. foo */
{LETTER}{ALNUM}* {
  return svalTok(TOK_NAME);
}

  /* integer literal; dec, oct, or hex */
[1-9][0-9]*{INT_SUFFIX}?           |
[0][0-7]*{INT_SUFFIX}?             |
[0][xX]{HEXDIGIT}+{INT_SUFFIX}?    {
  return svalTok(TOK_INT_LITERAL);
}

  /* hex literal with nothing after the 'x' */
[0][xX] {
  err("hexadecimal literal with nothing after the 'x'");
  return svalTok(TOK_INT_LITERAL);
}

  /* floating literal */
{DIGITS}"."{DIGITS}?{EXPONENT}?{FLOAT_SUFFIX}?   |
"."{DIGITS}{EXPONENT}?{FLOAT_SUFFIX}?	         |
{DIGITS}{EXPONENT}?{FLOAT_SUFFIX}?               {
  return svalTok(TOK_FLOAT_LITERAL);
}

  /* floating literal with no digits after the 'e' */
{DIGITS}"."{DIGITS}?[eE]{SIGN}?   |
"."{DIGITS}[eE]{SIGN}?            |
{DIGITS}[eE]{SIGN}?               {
  err("floating literal with no digits after the 'e'");
  
  /* in recovery rules like this it's best to yield the best-guess
   * token type, instead of some TOK_ERROR, so the parser can still
   * try to make sense of the input; having reported the error is
   * sufficient to ensure that later stages won't try to interpret
   * the lexical text of this token as a floating literal */
  return svalTok(TOK_FLOAT_LITERAL);
}

  /* string literal */
{QUOTE}({STRCHAR}|{ESCAPE})*{QUOTE}    {
  return svalTok(TOK_STRING_LITERAL);
}

  /* string literal missing final quote */
{QUOTE}({STRCHAR}|{ESCAPE})*{EOL}   {
  err("string literal missing final `\"'");
  return svalTok(TOK_STRING_LITERAL);     // error recovery
}

  /* unterminated string literal; maximal munch causes us to prefer
   * either of the above two rules when possible; the trailing
   * optional backslash is needed so the scanner won't back up in that
   * case; NOTE: this can only happen if the file ends in the string
   * and there is no newline before the EOF */
{QUOTE}({STRCHAR}|{ESCAPE})*{BACKSL}? {
  err("unterminated string literal");
  yyterminate();
}

  /* character literal */
{TICK}({CCHAR}|{ESCAPE}){TICK}   {
  return svalTok(TOK_CHAR_LITERAL);
}

  /* incomplete character literal */
{TICK}({CCHAR}|{ESCAPE})?{ANY}?  {
  err("incomplete/unterminated character literal");  
  return svalTok(TOK_CHAR_LITERAL);     // error recovery
}

  /* whitespace */
  /* 10/20/02: added '\r' to accomodate files coming from Windows; this
   * could be seen as part of the mapping from physical source file
   * characters to the basic character set (cppstd 2.1 para 1 phase 1),
   * except that it doesn't happen for chars in string/char literals... */
[ \t\n\f\v\r]+  {
  whitespace();
}

  /* C++ comment */
  /* we don't match the \n because that way this works at EOF */
"//"{NOTNL}*    {
  whitespace();
}

  /* C comment */
"/""*"([^*]|"*"*[^*/])*"*"+"/"     {
  // the pattern is a little complicated because the naive one,
  //   "/""*"([^*]|"*"[^/])*"*/"
  // fails to match e.g. "/***/" 
  whitespace();
}

  /* unterminated C comment */
"/""*"([^*]|"*"*[^*/])*"*"*        {
  err("unterminated /""*...*""/ comment");
  yyterminate();
}

  /* illegal */
.  {
  updLoc();
  err(stringc << "illegal character: `" << yytext[0] << "'");
}

<<EOF>> {
  srcFile->doneAdding();
  yyterminate();
}


%%
/**************/
/* extra code */
/**************/



// another C comment system using a start state; I chose to (fix and)
// use the single-line rule instead, so that the entire thing would be
// matched at once within the FSM instead of dropping out for user
// actions
#if 0
/* exclusive start state for when inside a slash-star style comment */
%x IN_C_COMMENT

  /* C comment; dsw: one that actually works! */
"/""*" {
  yymore();
  BEGIN IN_C_COMMENT;
}
<IN_C_COMMENT>[^*]+ {
  yymore();
}
<IN_C_COMMENT>"*"+"/" {
  whitespace();
  BEGIN INITIAL;
}
<IN_C_COMMENT>"*"+ {
  /* NOTE: this rule must come after the above rule */
  yymore();
}
<IN_C_COMMENT><<EOF>> {
  /* TODO: I don't know why flex doesn't give me an error if I omit this rule, since -f is used */
  err("unterminated /**/ comment");
  whitespace();
  /* dsw: TODO: I think I just have to replicate this here or we could factor it out */
  srcFile->doneAdding();
  yyterminate();
}
#endif // 0