File: gen.ml

package info (click to toggle)
ocaml-uunf 17.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 536 kB
  • sloc: ml: 6,638; makefile: 6
file content (95 lines) | stat: -rw-r--r-- 2,907 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
(*---------------------------------------------------------------------------
   Copyright 2012 The uunf programmers. All rights reserved.
   SPDX-License-Identifier: ISC
  ---------------------------------------------------------------------------*)

(* Extracts normalization data from the Unicode Character Database *)

let log fmt = Printf.eprintf (fmt ^^ "%!")
let pp = Format.fprintf
let str = Printf.sprintf
let str_of_size s =
  let b = s * (Sys.word_size / 8) in
  if b < 1_048_576 then str "%.1f Ko" (float b /. 1024.) else
  if b < 1_073_741_824 then str "%.1f Mo" (float b /. 1024. ** 2.) else
  str "%.1f Go" (float b /. 1024. ** 3.)

(* Characters *)

let is_hangul_syllabe u = 0xAC00 <= u && u <= 0xD7A3

let iter_uchar_ints f =
  let rec loop u =
    let i = Uchar.to_int u in
    if Uchar.equal u Uchar.max then f i else
    (f i; loop (Uchar.succ u))
  in
  loop Uchar.min

(* Compact maps from characters to booleans. *)

let bool_prop_maps prop =
  let tm = Uunf_tmapbool.create true in
  let fm = Uunf_tmapbool.create false in
  let add_uchar u =
    let b = prop u in
    Uunf_tmapbool.set tm u b;
    Uunf_tmapbool.set fm u b;
  in
  iter_uchar_ints add_uchar; tm, fm

let assert_bool_prop_maps prop tm fm =
  let assert_uchar u =
    let fail () = failwith (str "bool prop map failure for U+%04X" u) in
    let b = prop u in
    if b <> Uunf_tmapbool.get tm u then fail ();
    if b <> Uunf_tmapbool.get fm u then fail ();
  in
  iter_uchar_ints assert_uchar

(* Compact maps from characters to bytes. *)

let byte_prop_map ~default prop =
  let m = Uunf_tmapbyte.create default in
  let add_uchar u = Uunf_tmapbyte.set m u (prop u) in
  iter_uchar_ints add_uchar; m

let assert_byte_prop_map prop m =
  let assert_uchar u =
    if (prop u) = Uunf_tmapbyte.get m u then () else
    failwith (str "byte prop map failure for U+%04X" u)
  in
  iter_uchar_ints assert_uchar

(* Compact maps from characters to arbitrary values. *)

let prop_map ~default prop =
  let m = Uunf_tmap.create default in
  let add_uchar u = Uunf_tmap.set m u (prop u) in
  iter_uchar_ints add_uchar; m

let assert_prop_map prop m =
  let assert_uchar u =
    if (prop u) = Uunf_tmap.get m u then () else
    failwith (str "prop map failure for U+%04X" u)
  in
  iter_uchar_ints assert_uchar

let ucd_get ucd u p pstr = match Uucd.cp_prop ucd u p with
| None -> invalid_arg (str "no %s property for U+%04X" pstr u)
| Some v -> v

(* Generate a module *)

let year = (Unix.gmtime (Unix.gettimeofday ())).Unix.tm_year + 1900

let pp_mod pp_mod ppf m =
  pp ppf
"\
(*---------------------------------------------------------------------------
   Copyright (c) %d The uunf programmers. All rights reserved.
   SPDX-License-Identifier: ISC
  ---------------------------------------------------------------------------*)

(* WARNING do not edit. This file was automatically generated. *)
@\n@[%a@]@\n" year pp_mod m