1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
/*
** Unicode example
** written by Matthew Ford (c)2000 Forward Computing and Control Pty. Ltd.
** email matthew.ford@forward.com.au
**
** The UnicodeLexer is the interesting part
*/
{
import java.io.*;
import antlr.collections.AST;
import antlr.*;
import antlr.collections.*;
import antlr.debug.misc.*;
} // end of Antlr intro block
class UnicodeParser extends Parser;
options {
buildAST = false; // skip the tree building
defaultErrorHandler = false; // Don't generate parser error handlers
}
program
: (statement)* // perhaps none
EOF
;
protected
statement
{Token exprToken;}
: lhs:IDENT ASSIGNS rhs:IDENT SEMI!
{ System.out.println(" Found statement: " + lhs.getText()+ ":=" + rhs.getText() ); }
| tt:TOTAL_TIME SEMI!
{ System.out.println(" Found TOTAL_TIME statement: " + tt.getText()); }
| SEMI! {System.out.println(" Found empty statement"); }
;
class UnicodeLexer extends Lexer;
options {
charVocabulary = '\u0000'..'\uFFFE'; // allow all possiable unicodes except -1 == EOF
testLiterals = false; // in general do not test literals
caseSensitiveLiterals=false;
caseSensitive=false;
defaultErrorHandler = false; // pass error back to parser
k = 2; // two character lookahead for // versus /*
}
tokens {
TOTAL_TIME = "\u5408\u8A08\u6642\u9593"; // total_time
}
// an identifier. Note that testLiterals is set to true! This means
// that after we match the rule, we look in the literals table to see
// if it's a literal or really an identifer
// NOTE: any char > \u0080 can start an Ident
// may need to restrict this more in some cases
// \uFFFF is EOF so do not include it here, stop at \uFFFE
IDENT
options {testLiterals=true;
paraphrase = "an identifier";}
: ('a'..'z'|'_'|'$'|'\u0080'..'\uFFFE') ('a'..'z'|'_'|'0'..'9'|'$'|'\u0080'..'\uFFFE')*
;
ASSIGNS options {paraphrase = ":=";}
: ":="
;
SEMI options {paraphrase = ";";}
: ';';
// white space is skipped by the parser
WS : ( ' '
| '\t'
| '\r'('\n')? {newline();}
| '\n' {newline();}
)
{$setType(Token.SKIP);} // way to set token type
;
|