1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
/*
* A mini C-like language scanner.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
%%{
machine clang;
newline = '\n' @{curline += 1;};
any_count_line = any | newline;
# Consume a C comment.
c_comment := any_count_line* :>> '*/' @{fgoto main;};
main := |*
# Alpha numberic characters or underscore.
alnum_u = alnum | '_';
# Alpha charactres or underscore.
alpha_u = alpha | '_';
# Symbols. Upon entering clear the buffer. On all transitions
# buffer a character. Upon leaving dump the symbol.
( punct - [_'"] ) {
printf( "symbol(%i): %c\n", curline, ts[0] );
};
# Identifier. Upon entering clear the buffer. On all transitions
# buffer a character. Upon leaving, dump the identifier.
alpha_u alnum_u* {
printf( "ident(%i): ", curline );
fwrite( ts, 1, te-ts, stdout );
printf("\n");
};
# Single Quote.
sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
'\'' . sliteralChar* . '\'' {
printf( "single_lit(%i): ", curline );
fwrite( ts, 1, te-ts, stdout );
printf("\n");
};
# Double Quote.
dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
'"' . dliteralChar* . '"' {
printf( "double_lit(%i): ", curline );
fwrite( ts, 1, te-ts, stdout );
printf("\n");
};
# Whitespace is standard ws, newlines and control codes.
any_count_line - 0x21..0x7e;
# Describe both c style comments and c++ style comments. The
# priority bump on tne terminator of the comments brings us
# out of the extend* which matches everything.
'//' [^\n]* newline;
'/*' { fgoto c_comment; };
# Match an integer. We don't bother clearing the buf or filling it.
# The float machine overlaps with int and it will do it.
digit+ {
printf( "int(%i): ", curline );
fwrite( ts, 1, te-ts, stdout );
printf("\n");
};
# Match a float. Upon entering the machine clear the buf, buffer
# characters on every trans and dump the float upon leaving.
digit+ '.' digit+ {
printf( "float(%i): ", curline );
fwrite( ts, 1, te-ts, stdout );
printf("\n");
};
# Match a hex. Upon entering the hex part, clear the buf, buffer characters
# on every trans and dump the hex on leaving transitions.
'0x' xdigit+ {
printf( "hex(%i): ", curline );
fwrite( ts, 1, te-ts, stdout );
printf("\n");
};
*|;
}%%
%% write data nofinal;
#define BUFSIZE 128
void scanner()
{
static char buf[BUFSIZE];
int cs, act, have = 0, curline = 1;
char *ts, *te = 0;
int done = 0;
%% write init;
while ( !done ) {
char *p = buf + have, *pe, *eof = 0;
int len, space = BUFSIZE - have;
if ( space == 0 ) {
/* We've used up the entire buffer storing an already-parsed token
* prefix that must be preserved. */
fprintf(stderr, "OUT OF BUFFER SPACE\n" );
exit(1);
}
len = fread( p, 1, space, stdin );
pe = p + len;
/* Check if this is the end of file. */
if ( len < space ) {
eof = pe;
done = 1;
}
%% write exec;
if ( cs == clang_error ) {
fprintf(stderr, "PARSE ERROR\n" );
break;
}
if ( ts == 0 )
have = 0;
else {
/* There is a prefix to preserve, shift it over. */
have = pe - ts;
memmove( buf, ts, have );
te = buf + (te-ts);
ts = buf;
}
}
}
int main()
{
scanner();
return 0;
}
|