1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFSIZE 4096
typedef struct _Scanner {
/* Scanner state. */
int cs;
int act;
int have;
int curline;
char *ts;
char *te;
char *p;
char *pe;
char *eof;
FILE *file;
int done;
/* Token data */
char *data;
int len;
int value;
char buf[BUFSIZE];
} Scanner;
%%{
machine Scanner;
write data;
}%%
void scan_init( Scanner *s, FILE *file )
{
memset (s, '\0', sizeof(Scanner));
s->curline = 1;
s->file = file;
s->eof = 0;
%% write init;
}
#define TK_NO_TOKEN (-1)
#define TK_ERR 128
#define TK_EOF 129
#define TK_Identifier 130
#define TK_Number 131
#define TK_String 132
#define ret_tok( _tok ) token = _tok; s->data = s->ts
int scan( Scanner *s )
{
int token = TK_NO_TOKEN;
int space, readlen;
while ( 1 ) {
if ( s->p == s->pe ) {
printf("scanner: need more data\n");
if ( s->ts == 0 )
s->have = 0;
else {
/* There is data that needs to be shifted over. */
printf("scanner: buffer broken mid token\n");
s->have = s->pe - s->ts;
memmove( s->buf, s->ts, s->have );
s->te -= (s->ts-s->buf);
s->ts = s->buf;
}
s->p = s->buf + s->have;
space = BUFSIZE - s->have;
if ( space == 0 ) {
/* We filled up the buffer trying to scan a token. */
printf("scanner: out of buffer space\n");
return TK_ERR;
}
if ( s->done ) {
printf("scanner: end of file\n");
s->p[0] = 0;
readlen = 1;
}
else {
readlen = fread( s->p, 1, space, s->file );
if ( readlen < space )
s->done = 1;
}
s->pe = s->p + readlen;
}
%%{
machine Scanner;
access s->;
variable p s->p;
variable pe s->pe;
variable eof s->eof;
main := |*
# Identifiers
( [a-zA-Z_] [a-zA-Z0-9_]* ) =>
{ ret_tok( TK_Identifier ); fbreak; };
# Whitespace
[ \t\n];
'"' ( [^\\"] | '\\' any ) * '"' =>
{ ret_tok( TK_String ); fbreak; };
# Number
digit+ =>
{ ret_tok( TK_Number ); fbreak; };
# EOF
0 =>
{ ret_tok( TK_EOF ); fbreak; };
# Anything else
any =>
{ ret_tok( *s->p ); fbreak; };
*|;
write exec;
}%%
if ( s->cs == Scanner_error )
return TK_ERR;
if ( token != TK_NO_TOKEN ) {
s->len = s->p - s->data;
return token;
}
}
}
int main (int argc, char** argv)
{
Scanner ss;
int tok;
scan_init(&ss, stdin);
while ( 1 ) {
tok = scan (&ss);
if ( tok == TK_EOF ) {
printf ("parser: EOF\n");
break;
}
else if ( tok == TK_ERR ) {
printf ("parser: ERR\n");
break;
}
else {
printf ("parser: %d \"", tok);
fwrite ( ss.data, 1, ss.len, stdout );
printf ("\"\n" );
}
}
return 0;
}
|