File: ident.g

package info (click to toggle)
antlr 2.7.7%2Bdfsg-14
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,016 kB
  • sloc: java: 54,649; cs: 12,537; makefile: 8,854; cpp: 7,359; pascal: 5,273; sh: 4,333; python: 4,297; lisp: 1,969; xml: 220; lex: 192; ansic: 127
file content (133 lines) | stat: -rwxr-xr-x 2,999 bytes parent folder | download | duplicates (11)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
header {
    /* common code to all generated files */
    def println(*args):
        if args:
            import sys
            enc = sys.getdefaultencoding()
            for arg in args[0:-1]:
                print arg.encode(enc,"replace"),
            print args[-1].encode(enc,"replace")
}
header "__main__" {
    // the main header
    pass
}
header "ident_l.__main__" {
    import sys,codecs

    def warn(msg):
        print >>sys.stderr,"warning:",msg
        sys.stderr.flush()

    def error(msg):
        print >>sys.stderr,"error:",msg
        sys.stderr.flush()

    
    try:
        sys.stdin = codecs.lookup("Shift-JIS")[-2](sys.stdin)
    except:
        warn("Japanese codecs required - please install.")
        sys.exit(0)
    L = Lexer()
    for token in L: 
        // I'm being conservative here ..
        print token.__str__().encode("ascii","replace")
}

header "__init__" {
    // init - for all classes
}

header "ident_p.__init__" {
    // init - for ident_l
}

header "ident_l.__init__" {
    // init - for ident_p
}

options {
    language=Python;
}

/*
** Unicode example
** written by Matthew Ford (c)2000 Forward Computing and Control Pty. Ltd.
** email matthew.ford@forward.com.au
**
** The UnicodeLexer is the interesting part
*/



class ident_p extends Parser;

options {
	buildAST = false;	// skip the tree building
	defaultErrorHandler = false;     // Don't generate parser error handlers
}


program
	: (statement)* // perhaps none
	   EOF 
;

protected
statement
  {exprToken=None}
	: lhs:IDENT ASSIGNS rhs:IDENT SEMI!
        { println(" Found statement:   ",lhs.getText(),":=",rhs.getText() ); }
	| tt:TOTAL_TIME SEMI!
        { println(" Found TOTAL_TIME statement: ",tt.getText()); }
	| SEMI! {println(" Found empty statement"); }
	;



class ident_l extends Lexer;

options {
	charVocabulary = '\u0000'..'\uFFFE';  // allow all possiable unicodes except -1 == EOF
	testLiterals = false;  // in general do not test literals 
	caseSensitiveLiterals=false;
	caseSensitive=false;  
	defaultErrorHandler = false;   // pass error back to parser
  k = 2; // two character lookahead for // versus /*	
}

tokens {
  TOTAL_TIME = "\u5408\u8A08\u6642\u9593"; // total_time
}


// an identifier.  Note that testLiterals is set to true!  This means
// that after we match the rule, we look in the literals table to see
// if it's a literal or really an identifer
// NOTE: any char > \u0080 can start an Ident
// may need to restrict this more in some cases
// \uFFFF is EOF so do not include it here, stop at \uFFFE
IDENT
	options {testLiterals=true;
	    paraphrase = "an identifier";}
	:	('a'..'z'|'_'|'$'|'\u0080'..'\uFFFE') ('a'..'z'|'_'|'0'..'9'|'$'|'\u0080'..'\uFFFE')*
	;


ASSIGNS options {paraphrase = ":=";}
	: ":="
	;
	
SEMI options {paraphrase = ";";}
	: ';';
	

// white space is skipped by the parser  
WS	:	(	' '			
		|	'\t'
		|	'\r'('\n')?	 {self.newline();}
		|	'\n' {self.newline();}		
		)
		{$setType(Token.SKIP);}		// way to set token type
	;