File: cpp.g

package info (click to toggle)
antlr 2.7.7%2Bdfsg-14
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,016 kB
  • sloc: java: 54,649; cs: 12,537; makefile: 8,854; cpp: 7,359; pascal: 5,273; sh: 4,333; python: 4,297; lisp: 1,969; xml: 220; lex: 192; ansic: 127
file content (284 lines) | stat: -rw-r--r-- 9,104 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
//
// A C PreProcessor
//
// Handles #define/#undef, #ifdef/#elsif/#else/#endif, and #include using only
// an ANTLR lexer (actually a stack of them).  This could be easily integrated
// with an existing lexer to do preprocessing and tokenizing all at once.
//
// Author: Eric Mahurin - eric_mahurin at yahoo dot com
// License: just give me credit
//
// BUG: missing some of the simpler directives
// BUG: doesn't follow the cpp spec perfectly - haven't made any effort at this
// not well tested
//
// Be aware that this is my first real attempt at both ANTLR and Java, so
// things may not be done the best way.  I welcome suggestions and fixes.
//

header {
	using System.Collections;
	using System.IO;
	using CommonAST					= antlr.CommonAST;
	using AST						= antlr.collections.AST;
	using TokenStreamSelector		= antlr.TokenStreamSelector;
	using TokenStreamRetryException = antlr.TokenStreamRetryException;
}

options {
    language="CSharp";
}

{
	class cpp : cppLexerTokenTypes {
	    public static TokenStreamSelector selector = new TokenStreamSelector();
	    public static void Main(string[] args) {
	        try {
	            // will need a stack of lexers for #include and macro calls
	            cppLexer mainLexer = new cppLexer(new CharBuffer(Console.In));
	            cppLexer.selector = selector;
	            selector.select(mainLexer);
	            for (;;) {
	                IToken t = selector.nextToken();
	                if (t.Type == Token.EOF_TYPE) 
	                {
	                	break;
	                }
	                Console.Out.Write(t.getText());
	            }
	        } catch(Exception e) {
	            Console.Error.WriteLine("exception: " + e);
	        }
	    }
	}
}

class cppLexer extends Lexer;

options {
    testLiterals 	= false;
    k 				= 4;
}

tokens {
    ENDIF ;
}

{
    public static TokenStreamSelector selector; 				// must be assigned externally
    protected static int ifState 			= 1; 				// -1: no-else false, 0: false, 1: true
    protected static IList ifStates 		= new ArrayList(); 	// holds nested if conditions
    protected static IDictionary defines 	= new Hashtable(); 	// holds the defines
    protected IDictionary defineArgs 		= new Hashtable(); 	// holds the args for a macro call
    
    public override void uponEOF() {
        try {
            selector.pop(); // return to old lexer/stream
            selector.retry();
        } catch (InvalidOperationException) {
            // return a real EOF if nothing in stack
        }
    }
}

DIRECTIVE {
    IList args 		= new ArrayList();
    bool  condition = true;
} : '#'
    ( "include" (WS)? includeFile:STRING { if (ifState==1) {
        // found this in examples/java/includeFile
        string name = includeFile.getText();
        name = name.Substring(1, name.Length-2);
        try {
            cppLexer sublexer = new cppLexer(new StreamReader(name));
            cppLexer.defines = defines; // want defines to be persistent
            sublexer.setFilename(name);
            selector.push(sublexer);
            selector.retry();
        } catch (/*FileNotFound*/IOException /*fnf*/) {
            Console.Error.WriteLine("cannot find file "+name);
        }
    }}
    | "define" WS defineMacro:RAW_IDENTIFIER
    {
        args.Add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0:RAW_IDENTIFIER (WS)? {args.Add(defineArg0.getText());}
                ( COMMA (WS)? defineArg1:RAW_IDENTIFIER (WS)? {args.Add(defineArg1.getText());} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            defineText:MACRO_TEXT { args[0] = defineText.getText(); }
        )? '\n' { newline(); }
    { if (ifState==1) {
        defines[defineMacro.getText()] = args;
        $setType(Token.SKIP);
    }}
    | "undef" WS undefMacro:RAW_IDENTIFIER { if (ifState==1) {
        defines.Remove(undefMacro.getText());
        $setType(Token.SKIP);
    }}
    | ("ifdef"|"ifndef"{condition=false;})
        WS ifMacro:RAW_IDENTIFIER
    {
        ifStates.Add(ifState);
        if (ifState==1) {
            condition = (defines.Contains(ifMacro.getText())==condition);
            ifState = condition?1:0;
        } else {
            ifState = -1;
        }
        if (ifState==1) {
            $setType(Token.SKIP);
        } else {
            // gobble up tokens until ENDIF (could be caused by else)
            for (;;) {
                try {
                    if (selector.nextToken().Type==ENDIF) break;
                } catch (TokenStreamRetryException /*r*/) {
                    // just continue if someone tried retry
                }
            }
            // retry in case we switched lexers
            selector.retry();
        }
    }
    |
        ( "else" // treat like elsif (true)
        | "elsif" WS elsifMacro:RAW_IDENTIFIER {
            condition=defines.Contains(elsifMacro.getText());
        }
        )
    {
        if (ifState==1) {
            // previous if/elsif was taken - discard rest
            ifState = -1;
            for (;;) {
                try {
                    if (selector.nextToken().Type==ENDIF) break;
                } catch (TokenStreamRetryException /*r*/) {
                    // just continue if someone tried retry
                }
            }
            // retry in case we switched lexers
            selector.retry();
        } else if (ifState==0 && condition) {
            // "elsif" (true) or "else"
            $setType(ENDIF);
            ifState = 1;
        }
    }
    | "endif" {
        condition = (ifState==1);
        try {
            // return to previous if state
            ifState = (int) ifStates[ifStates.Count - 1];
            ifStates.RemoveAt(ifStates.Count - 1);
            if (condition) {
                $setType(Token.SKIP);
            } else {
                // tell if/else/elsif to stop discarding tokens
                $setType(ENDIF);
            }
        } catch (ArgumentOutOfRangeException /*e*/) {
            // endif with no if
        }
    }
    );

IDENTIFIER options {testLiterals=true;} {
    IList define = new ArrayList();
    IList args = new ArrayList();
} :
    identifier:RAW_IDENTIFIER
    {
        // see if this is a macro argument
        define = (IList)defineArgs[identifier.getText()];
        if (_begin==0 && define==null) {
            // see if this is a macro call
            define = (IList)defines[identifier.getText()];
        }
    }
    ( { (define!=null) && (define.Count > 1) }? (WS|COMMENT)?
        // take in arguments if macro call requires them
        '('
        callArg0:EXPR {args.Add(callArg0.getText());}
        ( COMMA callArg1:EXPR {args.Add(callArg1.getText());} )*
        { args.Count==define.Count-1 }? // better have right amount
        ')'
    | { !((define!=null) && (define.Count>1)) }?
    )
{ if (define!=null) {
    string defineText = (string)define[0];
    if (_begin!=0) {
        // just substitute text if called from EXPR - no token created
        $setText(defineText);
    } else {
        // create a new lexer to handle the macro text
        cppLexer sublexer = new cppLexer(new StringReader(defineText));
        for (int i=0;i<args.Count;++i) {
            // treat macro arguments similar to local defines
            IList arg = new ArrayList();
            arg.Add((string)args[i]);
            sublexer.defineArgs[define[1+i]] = arg;
        }
        selector.push(sublexer);
        // retry in new lexer
        selector.retry();
    }
}};

STRING
    : '"' ( '\\' . | ~('\\'|'"') )* '"' // double quoted string
    | '\'' ( '\\' . | ~('\\'|'\'') )* '\'' // single quoted string
    ;

protected MACRO_TEXT :
    ( '\\'! NL {newline();} // escaped newline
    | ~('\n'|'\r')
    )*;

protected
NL :
    ( '\r'
    | '\n'
    | '\r' '\n'
    );

WS :
    ( ' '
    | '\t'
    | '\f'
    | NL {newline();}
    ) { /*$setType(Token.SKIP);*/ };

COMMENT :
    ( "//" (~('\n'|'\r'))* NL {newline();} // single line comment
    | "/*" ( options{greedy=false;} : NL {newline();} | ~('\n'|'\r') )* "*/" // multi-line comment
    ) { /*$setType(Token.SKIP);*/ };

protected RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;

NUMBER : ('0'..'9') ('0'..'9'|'a'..'z'|'A'..'Z'|'_')* ; // allow ahpha suffixes on numbers (i.e. L:long)

// group symbols into categories to parse EXPR
LEFT  : '(' | '[' | '{' ;
RIGHT : ')' | ']' | '}' ;
COMMA : ',' ;
OPERATOR : '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '`' | '|' | '~' ;

protected EXPR // allow just about anything without being ambiguous
    : (WS)? (NUMBER|IDENTIFIER)?
        (
            ( LEFT EXPR ( COMMA EXPR )* RIGHT
            | STRING
            | OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here
            )
            EXPR
        )?
    ;