File: j_scan.y

package info (click to toggle)
viewcvs 0.9.2%2Bcvs.1.0.dev.2004.07.28-4.1etch1
  • links: PTS
  • area: main
  • in suites: etch
  • size: 1,452 kB
  • ctags: 1,355
  • sloc: python: 10,100; cpp: 840; ansic: 763; yacc: 526; sh: 163; makefile: 115
file content (135 lines) | stat: -rw-r--r-- 3,358 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
%start token
%scanner

%local {
#include "elx.h"

/* from elx-java.c */
void yyserror(const char *msg);
int yyslex(void);

/* for the TK_ symbols, generated from java.y */
#include "java.h"

/* for keyword recognition */
#include "j_keywords.h"

extern void issue_token(char which);
extern void mark_token_start(void);

#define MAX_IDENT 200
static int idlen;
static char identifier[MAX_IDENT+1];
#define INIT_IDENT(c) (identifier[0] = (c), idlen = 1)
#define ADD_IDENT(c) if (idlen == MAX_IDENT) return E_IDENT_TOO_LONG; \
                     else identifier[idlen++] = (c)

/* ### is there a better place? */
#define E_IDENT_TOO_LONG  (-100)

static int lookup(void);
}


%%

token : pure_ws* { mark_token_start(); } slash_op

slash_op : "/=" { return TK_OPERATOR; }
	 | comment token
	 | '/' { return TK_OPERATOR; }
	 | one_token
	 |
	 ;

one_token : t_identifier { return lookup(); }
	  | t_literal { return TK_LITERAL; }
	  | t_operator { return TK_OPERATOR; }
	  | t_chars { return yysprev_char; }
	  | t_inc_dec { return TK_INC_DEC; }
	  | t_bracket
          ;

t_identifier : alpha { INIT_IDENT(yysprev_char); }
	       ( alphanum { ADD_IDENT(yysprev_char); } )*

alpha : 'a' - 'z' | 'A' - 'Z' | '_' | '$'
alphanum : alpha | digit

digit : '0' - '9'
hexdigit : digit | 'a' - 'f' | 'A' - 'F'
octal : '0' - '7'

t_literal : number | string | char_constant

number : ('1' - '9') digit* decimal_suffix
       | '.' digit+ [exponent] [float_suffix]
       | '0' (('x' | 'X') hexdigit+ | octal+) decimal_suffix
       ;
decimal_suffix : ('.' digit* [exponent] [float_suffix])
	       | 'l' | 'L'
	       | /* nothing */
	       ;
exponent : ('e' | 'E') ['+' | '-'] digit+
float_suffix : 'f' | 'F' | 'd' | 'D'

string : '"' string_char* '"' { issue_token(ELX_STRING); }
string_char : '\1' -> '"' | '"' <-> '\\' | '\\' <- '\377' | '\\' '\1' - '\377'

char_constant : '\'' one_char '\''
one_char : '\1' -> '\'' | '\'' <-> '\\' | '\\' <- '\377' | '\\' '\1' - '\377'

comment : ( "//" line_comment_char* '\n'
	  | "/*" (block_comment_char | '*' block_non_term_char)* "*/"
	  ) { issue_token(ELX_COMMENT); }
	;
line_comment_char : '\1' -> '\n' | '\n' <- '\377'
block_comment_char : '\1' -> '*' | '*' <- '\377'
block_non_term_char : '\1' -> '/' | '/' <- '\377'

t_operator : "<<" | ">>" | ">>>"
           | ">=" | "<=" | "==" | "!=" | "&&" | "||"
	   | "*=" | "%=" | "+=" | "-=" | "<<=" | ">>="
	   | ">>>=" | "&=" | "^=" | "|="
	   | '<' | '>' | '%' | '^' | '&' | '|'
	   ;
t_inc_dec : "++" | "--"

/* note: could not use ws* ; the '[' form would only reduce on $end
   rather than "any" character. that meant we could not recognize '['
   within the program text. separating out the cases Does The Right
   Thing */
t_bracket : '[' { return '['; }
	  | '[' ']' { return TK_DIM; }
	  | '[' ws+ ']' { return TK_DIM; }
	  ;

t_chars : ',' | ';' | '.' | '{' | '}' | '=' | '(' | ')' | ':'
        | ']' | '!' | '~' | '+' | '-' | '*' | '?'
	;

ws : pure_ws | comment

pure_ws : ' ' | '\t' | '\n' | '\f'

%%

static int lookup(void)
{
    int kw = KR_find_keyword(identifier, idlen);

    if (kw == KR__not_found)
    {
        /* terminate so user can grab an identifier string */
        identifier[idlen] = '\0';
        return TK_IDENTIFIER;
    }
    
    issue_token(ELX_KEYWORD);
    return kw;
}

const char *get_identifier(void)
{
    return identifier;
}