1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
open Let_syntax.Result
let ( % ) = Int.logor
let ( << ) = Int.shift_left
let ( >> ) = Int.shift_right
let ( & ) = Int.logand
let utf_8_string_of_unicode i =
if i <= 0x007F then (
let b = Bytes.create 1 in
Bytes.set_int8 b 0 i;
Ok (Bytes.to_string b))
else if i <= 0x07FF then (
let five_high_bits = i >> 6 & 0b11111 in
let six_low_bits = i & 0b111111 in
let high = 0b11000000 % five_high_bits << 8 in
let low = 0b10000000 % six_low_bits in
let n = high % low in
let b = Bytes.create 2 in
Bytes.set_int16_be b 0 n;
Ok (Bytes.to_string b))
else if i <= 0xFFFF then (
let four_high_bits = i >> 12 & 0b1111 in
let six_mid_bits = i >> 6 & 0b111111 in
let six_low_bits = i & 0b111111 in
let high = 0b11100000 % four_high_bits << 16 in
let mid = 0b10000000 % six_mid_bits << 8 in
let low = 0b10000000 % six_low_bits in
let n = high % mid % low in
let b = Bytes.create 3 in
Bytes.set_int32_be b 0 (Int32.of_int n);
Ok (Bytes.to_string b))
else if i <= 0x10FFFF then (
let three_hh_bits = i >> 18 & 0b111 in
let six_hl_bits = i >> 12 & 0b111111 in
let six_lh_bits = i >> 6 & 0b111111 in
let six_ll_bits = i & 0b111111 in
let hh = 0b11110000 % three_hh_bits << 24 in
let hl = 0b10000000 % six_hl_bits << 16 in
let lh = 0b10000000 % six_lh_bits << 8 in
let ll = 0b10000000 % six_ll_bits in
let n = hh % hl % lh % ll in
let b = Bytes.create 4 in
Bytes.set_int32_be b 0 (Int32.of_int n);
Ok (Bytes.to_string b))
else Error (Format.sprintf "invalid code point %X" i)
let unescape str =
if String.length str < 2 then
Error (Format.sprintf "too small escape sequence %s" str)
else
match str.[1] with
| 'u' ->
let escape_chars = String.sub str 2 4 in
let* as_int =
Format.sprintf "0x%s" escape_chars |> int_of_string_opt |> function
| Some x -> Ok x
| None -> Error (Format.sprintf "bad escape sequence %s" escape_chars)
in
utf_8_string_of_unicode as_int
| 'x' ->
let escape_chars = String.sub str 2 2 in
let* as_int =
Format.sprintf "0x%s" escape_chars |> int_of_string_opt |> function
| Some x -> Ok x
| None -> Error (Format.sprintf "bad escape sequence %s" escape_chars)
in
utf_8_string_of_unicode as_int
| '"' | '\'' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' -> Ok str
| '\\' -> Ok {|\|}
| '0' ->
if String.length str = 2 then Ok "\x00"
else if String.length str = 4 then
let octal_str = String.(sub str 2 2) in
let* as_int =
Format.sprintf "0o%s" octal_str |> int_of_string_opt |> function
| Some x -> Ok x
| None -> Error (Format.sprintf "bad escape sequence %s" octal_str)
in
utf_8_string_of_unicode as_int
else Error (Format.sprintf "invalid octal sequence %s" str)
| _ -> Error (Format.sprintf "invalid escape sequence %c" str.[1])
|