1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* $Id: lex_lexer.mll,v 1.3 2004/10/12 12:29:19 filliatr Exp $ *)
(* The lexical analyzer for lexer definitions. Bootstrapped! *)
{
open Lex_syntax
open Lex_parser
(* Auxiliaries for the lexical analyzer *)
let brace_depth = ref 0
and comment_depth = ref 0
exception Lexical_error of string * int * int
let initial_string_buffer = String.create 256
let string_buff = ref initial_string_buffer
let string_index = ref 0
let reset_string_buffer () =
string_buff := initial_string_buffer;
string_index := 0
let store_string_char c =
if !string_index >= String.length !string_buff then begin
let new_buff = String.create (String.length !string_buff * 2) in
String.blit !string_buff 0 new_buff 0 (String.length !string_buff);
string_buff := new_buff
end;
!string_buff.[!string_index] <- c;
incr string_index
let get_stored_string () =
String.sub !string_buff 0 !string_index
let char_for_backslash = function
'n' -> '\n'
| 't' -> '\t'
| 'b' -> '\b'
| 'r' -> '\r'
| c -> c
let char_for_decimal_code lexbuf i =
Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
(Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48))
let line_num = ref 1
let line_start_pos = ref 0
let handle_lexical_error fn lexbuf =
let line = !line_num
and column = Lexing.lexeme_start lexbuf - !line_start_pos in
try
fn lexbuf
with Lexical_error(msg, _, _) ->
raise(Lexical_error(msg, line, column))
let cur_loc lexbuf =
{ start_pos = Lexing.lexeme_start_p lexbuf;
end_pos = Lexing.lexeme_end_p lexbuf;
start_line = !line_num;
start_col = Lexing.lexeme_start lexbuf - !line_start_pos }
}
rule main = parse
[' ' '\013' '\009' '\012' ] +
{ main lexbuf }
| '\010'
{ line_start_pos := Lexing.lexeme_end lexbuf;
incr line_num;
main lexbuf }
| "(*"
{ comment_depth := 1;
handle_lexical_error comment lexbuf;
main lexbuf }
| ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
{ match Lexing.lexeme lexbuf with
"rule" -> Trule
| "parse" -> Tparse
| "and" -> Tand
| "eof" -> Teof
| "let" -> Tlet
| s ->
let l = cur_loc lexbuf in
(*i
Printf.eprintf "ident '%s' occurs at (%d,%d)\n"
s l.start_pos l.end_pos;
i*)
Tident (s,l) }
| '"'
{ reset_string_buffer();
handle_lexical_error string lexbuf;
Tstring(get_stored_string()) }
| "'" [^ '\\'] "'"
{ Tchar(Char.code(Lexing.lexeme_char lexbuf 1)) }
| "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
{ Tchar(Char.code(char_for_backslash (Lexing.lexeme_char lexbuf 2))) }
| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
{ Tchar(Char.code(char_for_decimal_code lexbuf 2)) }
| '{'
{ let n1 = Lexing.lexeme_end_p lexbuf
and l1 = !line_num
and s1 = !line_start_pos in
brace_depth := 1;
let n2 = handle_lexical_error action lexbuf in
Taction({start_pos = n1; end_pos = n2;
start_line = l1; start_col = n1.Lexing.pos_cnum - s1}) }
| '=' { Tequal }
| '|' { Tor }
| '_' { Tunderscore }
| '[' { Tlbracket }
| ']' { Trbracket }
| '*' { Tstar }
| '?' { Tmaybe }
| '+' { Tplus }
| '(' { Tlparen }
| ')' { Trparen }
| '^' { Tcaret }
| '-' { Tdash }
| eof { Tend }
| _
{ raise(Lexical_error
("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
!line_num, Lexing.lexeme_start lexbuf - !line_start_pos)) }
and action = parse
'{'
{ incr brace_depth;
action lexbuf }
| '}'
{ decr brace_depth;
if !brace_depth = 0 then Lexing.lexeme_start_p lexbuf else action lexbuf }
| '"'
{ reset_string_buffer();
string lexbuf;
reset_string_buffer();
action lexbuf }
| "'" [^ '\\'] "'"
{ action lexbuf }
| "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
{ action lexbuf }
| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
{ action lexbuf }
| "(*"
{ comment_depth := 1;
comment lexbuf;
action lexbuf }
| eof
{ raise (Lexical_error("unterminated action", 0, 0)) }
| '\010'
{ line_start_pos := Lexing.lexeme_end lexbuf;
incr line_num;
action lexbuf }
| _
{ action lexbuf }
and string = parse
'"'
{ () }
| '\\' [' ' '\013' '\009' '\012'] * '\010' [' ' '\013' '\009' '\012'] *
{ line_start_pos := Lexing.lexeme_end lexbuf;
incr line_num;
string lexbuf }
| '\\' ['\\' '"' 'n' 't' 'b' 'r']
{ store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
string lexbuf }
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
{ store_string_char(char_for_decimal_code lexbuf 1);
string lexbuf }
| eof
{ raise(Lexical_error("unterminated string", 0, 0)) }
| '\010'
{ store_string_char '\010';
line_start_pos := Lexing.lexeme_end lexbuf;
incr line_num;
string lexbuf }
| _
{ store_string_char(Lexing.lexeme_char lexbuf 0);
string lexbuf }
and comment = parse
"(*"
{ incr comment_depth; comment lexbuf }
| "*)"
{ decr comment_depth;
if !comment_depth = 0 then () else comment lexbuf }
| '"'
{ reset_string_buffer();
string lexbuf;
reset_string_buffer();
comment lexbuf }
| "''"
{ comment lexbuf }
| "'" [^ '\\' '\''] "'"
{ comment lexbuf }
| "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
{ comment lexbuf }
| "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
{ comment lexbuf }
| eof
{ raise(Lexical_error("unterminated comment", 0, 0)) }
| '\010'
{ line_start_pos := Lexing.lexeme_end lexbuf;
incr line_num;
comment lexbuf }
| _
{ comment lexbuf }
|