File: plexing.ml

package info (click to toggle)
camlp5 8.04.00-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 11,972 kB
  • sloc: ml: 137,918; makefile: 2,055; perl: 1,729; sh: 1,653; python: 38
file content (256 lines) | stat: -rw-r--r-- 7,059 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
(* camlp5r *)
(* plexing.ml,v *)
(* Copyright (c) INRIA 2007-2017 *)

open Versdep;

type pattern = (string * string);

exception Error of string;

type location = Ploc.t;
type location_function = int -> location;

value make_loc = Ploc.make_unlined;
value dummy_loc = Ploc.dummy;

module Locations = struct
  type t = { locations : ref (array (option Ploc.t)) ; overflow : ref bool } ;
  value locerr () = failwith "Lexer: location function" ;
  value create () = { locations = ref (array_create 1024 None) ; overflow = ref False } ;

value lookup t i =
  let (loct, ov) = (t.locations, t.overflow) in
  match
    if i < 0 || i >= Array.length loct.val then
      if ov.val then Some dummy_loc else None
    else Array.unsafe_get loct.val i
  with
  [ Some loc -> loc
  | None -> locerr () ]
;
value add t i loc =
  let (loct, ov) = (t.locations, t.overflow) in
  if i >= Array.length loct.val then
    let new_tmax = Array.length loct.val * 2 in
    if new_tmax < Sys.max_array_length then do {
      let new_loct = array_create new_tmax None in
      Array.blit loct.val 0 new_loct 0 (Array.length loct.val);
      loct.val := new_loct;
      loct.val.(i) := Some loc
    }
    else ov.val := True
  else loct.val.(i) := Some loc
;

end ;

type lexer_func 'te = Stream.t char -> (Stream.t 'te * Locations.t);

type lexer 'te =
  { tok_func : lexer_func 'te;
    tok_using : pattern -> unit;
    tok_removing : pattern -> unit;
    tok_match : mutable pattern -> 'te -> string;
    tok_text : pattern -> string;
    tok_comm : mutable option (list location);
    kwds : Hashtbl.t string string
  }
;

value lexer_text (con, prm) =
  if con = "" then "'" ^ prm ^ "'"
  else if prm = "" then con
  else con ^ " '" ^ prm ^ "'"
;

value make_stream_and_location next_token_loc =
  let loct = Locations.create () in
  let ts =
    Stream.from
      (fun i -> do {
         let (tok, loc) = next_token_loc () in
         Locations.add loct i loc;
         Some tok
       })
  in
  (ts, loct)
;

value lexer_func_of_parser next_token_loc cs =
  let line_nb = ref 1 in
  let bolpos = ref 0 in
  make_stream_and_location (fun () -> next_token_loc (cs, line_nb, bolpos))
;

value lexer_func_of_ocamllex_located lexfun cs =
  let lb =
    Lexing.from_function
      (fun s n ->
         try do { string_set s 0 (Stream.next cs); 1 } with
         [ Stream.Failure -> 0 ])
  in
  let next_token_func () = lexfun lb in
  make_stream_and_location next_token_func
;

value lexer_func_of_ocamllex lexfun cs =
  let lexfun_located lb =
    let tok = lexfun lb in
    let loc = make_loc (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
    (tok, loc)
  in
  lexer_func_of_ocamllex_located lexfun_located cs
;

(* Char and string tokens to real chars and string *)

value buff = ref (string_create 80);
value store len x = do {
  if len >= string_length buff.val then
    buff.val := string_cat buff.val (string_create (string_length buff.val))
  else ();
  string_set buff.val len x;
  succ len
};
value get_buff len = string_sub buff.val 0 len;

value valch x = Char.code x - Char.code '0';
value valch_a x = Char.code x - Char.code 'a' + 10;
value valch_A x = Char.code x - Char.code 'A' + 10;

value rec backslash s i =
  if i = String.length s then raise Not_found
  else
    match s.[i] with
    [ 'n' -> ('\n', i + 1)
    | 'r' -> ('\r', i + 1)
    | 't' -> ('\t', i + 1)
    | 'b' -> ('\b', i + 1)
    | '\\' -> ('\\', i + 1)
    | '"' -> ('"', i + 1)
    | ''' -> (''', i + 1)
    | '0'..'9' as c -> backslash1 (valch c) s (i + 1)
    | 'x' -> backslash1h s (i + 1)
    | _ -> raise Not_found ]
and backslash1 cod s i =
  if i = String.length s then ('\\', i - 1)
  else
    match s.[i] with
    [ '0'..'9' as c -> backslash2 (10 * cod + valch c) s (i + 1)
    | _ -> ('\\', i - 1) ]
and backslash2 cod s i =
  if i = String.length s then ('\\', i - 2)
  else
    match s.[i] with
    [ '0'..'9' as c -> (Char.chr (10 * cod + valch c), i + 1)
    | _ -> ('\\', i - 2) ]
and backslash1h s i =
  if i = String.length s then ('\\', i - 1)
  else
    match s.[i] with
    [ '0'..'9' as c -> backslash2h (valch c) s (i + 1)
    | 'a'..'f' as c -> backslash2h (valch_a c) s (i + 1)
    | 'A'..'F' as c -> backslash2h (valch_A c) s (i + 1)
    | _ -> ('\\', i - 1) ]
and backslash2h cod s i =
  if i = String.length s then ('\\', i - 2)
  else
    match s.[i] with
    [ '0'..'9' as c -> (Char.chr (16 * cod + valch c), i + 1)
    | 'a'..'f' as c -> (Char.chr (16 * cod + valch_a c), i + 1)
    | 'A'..'F' as c -> (Char.chr (16 * cod + valch_A c), i + 1)
    | _ -> ('\\', i - 2) ]
;

value rec skip_indent s i =
  if i = String.length s then i
  else
    match s.[i] with
    [ ' ' | '\t' -> skip_indent s (i + 1)
    | _ -> i ]
;

value skip_opt_linefeed s i =
  if i = String.length s then i else if s.[i] = '\010' then i + 1 else i
;

value eval_char s =
  if String.length s = 1 then s.[0]
  else if String.length s = 0 then failwith "invalid char token"
  else if s.[0] = '\\' then
    if String.length s = 2 && s.[1] = ''' then '''
    else
      try
        let (c, i) = backslash s 1 in
        if i = String.length s then c else raise Not_found
      with
      [ Not_found -> failwith "invalid char token" ]
  else failwith "invalid char token"
;

value eval_string loc s =
  bytes_to_string (loop 0 0) where rec loop len i =
    if i = String.length s then get_buff len
    else
      let (len, i) =
        if s.[i] = '\\' then
          let i = i + 1 in
          if i = String.length s then failwith "invalid string token"
          else if s.[i] = '"' then (store len '"', i + 1)
          else
            match s.[i] with
            [ '\010' -> (len, skip_indent s (i + 1))
            | '\013' -> (len, skip_indent s (skip_opt_linefeed s (i + 1)))
            | c ->
                try
                  let (c, i) = backslash s i in
                  (store len c, i)
                with
                [ Not_found -> (store (store len '\\') c, i + 1) ] ]
        else (store len s.[i], i + 1)
      in
      loop len i
;

value default_match =
  fun
  [ ("ANY", "") -> fun (con, prm) -> prm
  | ("ANY", v) ->
      fun (con, prm) -> if v = prm then v else raise Stream.Failure
  | (p_con, "") ->
      fun (con, prm) -> if con = p_con then prm else raise Stream.Failure
  | (p_con, p_prm) ->
      fun (con, prm) ->
        if con = p_con && prm = p_prm then prm else raise Stream.Failure ]
;

value input_file = ref "";
value line_nb = ref (ref 0);
value bol_pos = ref (ref 0);
value restore_lexing_info = ref None;

(* The lexing buffer used by pa_lexer.cmo *)

value rev_implode l =
  let s = string_create (List.length l) in
  bytes_to_string (loop (string_length s - 1) l) where rec loop i =
    fun
    [ [c :: l] -> do { string_unsafe_set s i c; loop (i - 1) l }
    | [] -> s ]
;

module Lexbuf :
  sig
    type t = 'abstract;
    value empty : t;
    value add : char -> t -> t;
    value get : t -> string;
  end =
  struct
    type t = list char;
    value empty = [];
    value add c l = [c :: l];
    value get = rev_implode;
  end
;