File: parse_scripts.ml

package info (click to toggle)
camomile 0.8.4-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 22,252 kB
  • sloc: ml: 14,032; makefile: 368; xml: 224; sh: 6
file content (87 lines) | stat: -rw-r--r-- 3,585 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
(** Parse the script data of Unicode *)
(* Copyright 2002, 2003, 2011 Yamagata Yoriyuki. *)

(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)

(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)

(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *)
(* Lesser General Public License for more details. *)

(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)

(* You can contact the authour by sending email to *)
(* yori@users.sourceforge.net *)

module Unidata = Unidata.Make(Camomileconfig)
open Unidata

let tbl_rw = 
  let max_uchar = UChar.chr_of_uint 0x7fffffff in
  let null = UChar.chr_of_uint 0 in
  let n = num_of_script `Common in
  ref (UMap.add_range null max_uchar n UMap.empty)

(* remove comments *)
let range_pat = 
  Str.regexp "\\([0-9A-Fa-f]+\\)\\.\\.\\([0-9A-Fa-f]+\\)[ \\t]*;[ \\t]*\\([^ \\t]+\\)"
let num_pat = 
  Str.regexp "\\([0-9A-Za-z]+\\)+[ \\t]*;[ \\t]*\\([^ \\t]+\\)"

(* let get_line () =
  let s = read_line () in
  if Str.string_match line_pat s 0 then Str.matched_group 1 s else s *)

let prev_entry = ref 0

let read_data () =
  try while true do
    let s = read_line () in
    if Str.string_match range_pat s 0 then
      let u1 = UChar.chr_of_uint (int_of_string ("0x"^(Str.matched_group 1 s))) in
      let u2 = UChar.chr_of_uint (int_of_string ("0x"^(Str.matched_group 2 s))) in
      let name = Str.matched_group 3 s in
      let script = script_of_name name in
      let num = num_of_script script in
      tbl_rw := UMap.add_range u1 u2 num !tbl_rw
    else if Str.string_match num_pat s 0 then
      let n = int_of_string ("0x"^(Str.matched_group 1 s)) in
      let name = Str.matched_group 2 s in
      let script = script_of_name name in
      let num = num_of_script script in      
      tbl_rw := UMap.add (UChar.chr_of_uint n) num !tbl_rw
    else ()
  done with End_of_file -> ()

let main () =
  begin
    read_data ();
    Arg.parse [] 
      (fun dir -> 
	let write name value = Database.write dir "mar" output_value name value in
	write "scripts_map" (UMap.map script_of_num !tbl_rw);
	write "scripts" (UCharTbl.Bits.of_map (num_of_script `Common) !tbl_rw))
      "Parse Scripts.txt"
  end
    
let _ = main ()