File: stdcompat__uchar.ml.in

package info (click to toggle)
ocaml-stdcompat 19-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,516 kB
  • sloc: ml: 27,806; sh: 875; makefile: 246
file content (106 lines) | stat: -rw-r--r-- 1,902 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
@BEGIN_FROM_4_03_0@
include Uchar
@END_FROM_4_03_0@
@BEGIN_BEFORE_4_03_0@
@BEGIN_WITH_UCHAR_PKG@
include Uchar
@END_WITH_UCHAR_PKG@
@BEGIN_WITHOUT_UCHAR_PKG@
type t = int

let min = 0

let max = 0x10FFFF

let lo_bound = 0xD7FF

let hi_bound = 0xE000

let succ u =
  if u = lo_bound then
    hi_bound
  else if u = max then
    invalid_arg "Uchar.succ"
  else
    succ u

let pred u =
  if u = hi_bound then
    lo_bound
  else if u = min then
    invalid_arg "Uchar.pred"
  else
  pred u

let is_valid i =
  min <= i && i <= lo_bound || hi_bound <= i && i <= max

let of_int i =
  if is_valid i then
    i
  else
    invalid_arg "Uchar.of_int"

let unsafe_of_int i =
  i

let to_int i =
  i

let is_char u =
  u < 0x100

let of_char c =
  Char.code c

let to_char u =
  if is_char u then
    Char.unsafe_chr u
  else
    invalid_arg "Uchar.to_char"

let unsafe_to_char u =
  Char.unsafe_chr u

let equal : t -> t -> bool = ( = )

let compare : t -> t -> int = compare

let hash = to_int
@END_WITHOUT_UCHAR_PKG@
@END_BEFORE_4_03_0@

@BEGIN_BEFORE_4_14_0@
let rep' = 0xFFFD

type utf_decode = int

let valid_bit = 27
let decode_bits = 24

let utf_decode_is_valid d = (d lsr valid_bit) = 1
let utf_decode_length d = (d lsr decode_bits) land 0b111
let utf_decode_uchar d = unsafe_of_int (d land 0xFFFFFF)
let utf_decode n u = ((8 lor n) lsl decode_bits) lor (to_int u)
let utf_decode_invalid n = (n lsl decode_bits) lor rep'

let utf_8_byte_length u = match to_int u with
| u when u < 0 -> assert false
| u when u <= 0x007F -> 1
| u when u <= 0x07FF -> 2
| u when u <= 0xFFFF -> 3
| u when u <= 0x10FFFF -> 4
| _ -> assert false

let utf_16_byte_length u = match to_int u with
| u when u < 0 -> assert false
| u when u <= 0xFFFF -> 2
| u when u <= 0x10FFFF -> 4
| _ -> assert false
@END_BEFORE_4_14_0@

@BEGIN_BEFORE_4_06_0@
let bom = unsafe_of_int 0xFEFF

let rep = unsafe_of_int rep'
@END_BEFORE_4_06_0@