File: tokenizer.ml

package info (click to toggle)
ocaml-sedlex 3.7-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 792 kB
  • sloc: ml: 7,866; makefile: 24; sh: 9
file content (23 lines) | stat: -rw-r--r-- 800 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
let digit = [%sedlex.regexp? '0' .. '9']
let number = [%sedlex.regexp? Plus digit]

let rec token buf =
  let letter = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z'] in
  match%sedlex buf with
    | number ->
        Printf.printf "Number %s\n" (Sedlexing.Latin1.lexeme buf);
        token buf
    | letter, Star ('A' .. 'Z' | 'a' .. 'z' | digit) ->
        Printf.printf "Ident %s\n" (Sedlexing.Latin1.lexeme buf);
        token buf
    | Plus xml_blank -> token buf
    | Plus (Chars "+*-/") ->
        Printf.printf "Op %s\n" (Sedlexing.Latin1.lexeme buf);
        token buf
    | 128 .. 255 -> print_endline "Non ASCII"
    | eof -> print_endline "EOF"
    | _ -> failwith "Unexpected character"

let () =
  let lexbuf = Sedlexing.Latin1.from_string "foobar A123Bfoo  ++123Xbar/foo" in
  token lexbuf