1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
let simple_uchar_to_string (c : Uchar.t) : string =
let c = Uchar.to_int c in
let bits =
Array.make 64 false
|> Array.mapi (fun i _ -> Int.shift_right c (63 - i) land 0x1 <> 0)
in
let char_of_bit_list bits =
let bits = Array.of_list bits in
assert (Array.length bits = 8);
let res = ref 0 in
for i = 0 to 7 do
if bits.(i) then res := !res lor (0x1 lsl (7 - i))
done;
Char.chr !res
in
let get_start_from_right i = Array.get bits (63 - i) in
let chars =
if c <= 0x7F then
[
[
false;
get_start_from_right 6;
get_start_from_right 5;
get_start_from_right 4;
get_start_from_right 3;
get_start_from_right 2;
get_start_from_right 1;
get_start_from_right 0;
];
]
else if c <= 0x7FF then
[
[
true;
true;
false;
get_start_from_right 10;
get_start_from_right 9;
get_start_from_right 8;
get_start_from_right 7;
get_start_from_right 6;
];
[
true;
false;
get_start_from_right 5;
get_start_from_right 4;
get_start_from_right 3;
get_start_from_right 2;
get_start_from_right 1;
get_start_from_right 0;
];
]
else if c <= 0xFFFF then
[
[
true;
true;
true;
false;
get_start_from_right 15;
get_start_from_right 14;
get_start_from_right 13;
get_start_from_right 12;
];
[
true;
false;
get_start_from_right 11;
get_start_from_right 10;
get_start_from_right 9;
get_start_from_right 8;
get_start_from_right 7;
get_start_from_right 6;
];
[
true;
false;
get_start_from_right 5;
get_start_from_right 4;
get_start_from_right 3;
get_start_from_right 2;
get_start_from_right 1;
get_start_from_right 0;
];
]
else if c <= 0x10FFFF then
[
[
true;
true;
true;
true;
false;
get_start_from_right 20;
get_start_from_right 19;
get_start_from_right 18;
];
[
true;
false;
get_start_from_right 17;
get_start_from_right 16;
get_start_from_right 15;
get_start_from_right 14;
get_start_from_right 13;
get_start_from_right 12;
];
[
true;
false;
get_start_from_right 11;
get_start_from_right 10;
get_start_from_right 9;
get_start_from_right 8;
get_start_from_right 7;
get_start_from_right 6;
];
[
true;
false;
get_start_from_right 5;
get_start_from_right 4;
get_start_from_right 3;
get_start_from_right 2;
get_start_from_right 1;
get_start_from_right 0;
];
]
else
failwith "Unexpected case"
in
chars |> List.map char_of_bit_list |> List.to_seq |> String.of_seq
let () =
Crowbar.add_test
~name:"ccutf8_string_uchar_to_bytes_is_same_as_simple_version"
[ Crowbar.range (succ 0x10FFFF) ]
(fun c ->
Crowbar.guard (Uchar.is_valid c);
let c = Uchar.of_int c in
let simple_answer = simple_uchar_to_string c in
let answer =
let buf = ref [] in
CCUtf8_string.uchar_to_bytes c (fun c -> buf := c :: !buf);
!buf |> List.rev |> List.to_seq |> String.of_seq
in
Crowbar.check_eq simple_answer answer)
|