File: ExtractTables.rb

package info (click to toggle)
jcodings 1.0.11-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,024 kB
  • ctags: 1,473
  • sloc: java: 10,088; ruby: 264; xml: 140; makefile: 5
file content (131 lines) | stat: -rw-r--r-- 4,070 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
require 'open-uri'
repo_path = ARGV.first # path to ruby repo

enc_path = "#{repo_path}/enc"
folds_src = open("#{enc_path}/unicode.c").read
unicode_src = open("#{enc_path}/unicode/name2ctype.src").read

dst_dir = "../src/org/jcodings"
dst_bin_dir = "../resources/tables"
enc_dir = "#{dst_dir}/unicode"
INDENT = " " * 4

def assert_eq a, b, msg = ""
    raise "unmet condition: #{a.inspect} == #{b.inspect}, info #{msg}" unless a == b
end

def assert_not_eq a, b, msg = ""
    raise "unmet condition: #{a.inspect} != #{b.inspect}, info: #{msg}" unless a != b
end

def extract_to to, file
    to = to.map do |t|
        case t
        when /^0x[0-9a-f]+$/
            t.to_i(16)
        else
            t = t.split(',').map{|t|t.strip}
            assert_eq(t.all?{|e|e =~ /^0x[0-9a-f]+$/}, true)
            t.map{|t| t.to_i(16)}
        end
    end
    
    open(file, "wb") do |f|
        f << [to.size].pack("N")
        to.each do |t| case t
            when Fixnum
                f << [1].pack("N")
                f << [t].pack("N")
            when Array
                f << [t.size].pack("N")
                t.each{|tx| f << [tx].pack("N")}
            else
                raise "foo"
            end
        end
    end
end

folds = folds_src.scan(/static\s+const\s+(\w+)\s+(\w+)\[\]\s+=\s+\{(.*?)\}\;/m).map do |(type, name, tab)|
    case type
        when /Case(\w+)_11_Type/
            from, to = tab.scan(/(\w+).+\{\w+?.+?\{(.+?)\s?\}/).transpose
            
            assert_eq(to.size, from.size)

            assert_eq(from.all?{|e| e =~ /^0x[0-9a-f]+$/}, true)
            from = from.map{|e| e.to_i(16)}
            open("#{dst_bin_dir}/#{name}_From.bin", "wb") do |f|
                f << [from.size].pack("N")
                from.each{|fr| f << [fr].pack("N")}
            end

            extract_to to, "#{dst_bin_dir}/#{name}_To.bin"

        when "CaseUnfold_12_Type", "CaseUnfold_13_Type"
            fld =  tab.scan(/\{\s?\{(.+?)\}.+\{\w+?.+?\{(.+?)\s?\}/).flatten

            extract_to fld, "#{dst_bin_dir}/#{name}.bin"
        else
            raise "error"
    end
end

unicode_src.scan(/static\s+const\s+(\w+)\s+(\w+)\[\]\s+=\s+\{(.*?)\}\;/m).each do |(type, name, tab)|
    tab = tab.split(",").map{|e|e.strip}
    assert_eq(tab.last, "")
    tab.pop
    
    size = tab.size
    head = tab.shift
    
    assert_eq(head =~ /^\d+$/, 0)
    head = head.to_i

    assert_eq(tab[1..-1].all?{|e| e =~ /^0x[0-9a-f]+$/}, true)

    tab = tab.map{|e| e.to_i(16)}
    assert_eq(tab.all?{|e|e >= 0 && e <= 4294967295}, true)

    open("#{dst_bin_dir}/#{name}.bin", "wb") do |f|
        f << [size].pack("N")
        f << [head].pack("N")
        tab.each{|e| f << [e].pack("N") }
    end
end


cr_map =  unicode_src.scan(/#define (CR_.*?) (.*)/).inject(Hash.new{|h, k|k}){|h, (k,v)| h[k] = v;h}

aliases = unicode_src[/%%(.*?)%%/m, 1].scan(/(.*?),\s+(\d+)/).inject(Hash.new{|h, k| h[k] = []}){|h, (name, num)| h[num.to_i] << name; h}.inject({}) do |h, (k, v)|
    full, *abbr = v.map{|e|e.strip}
    h[full] = abbr
    h
end

unicode_src.scan(/CodeRanges\[\]\s+=\s+\{(.*?)\}\;/m) do |e|
    names = e.first.scan(/CR_\w+/)

    cnames = names.map do |c|
        n = c[/CR_(.*)/, 1]
        px = case n
            when /Age_(\d)_(\d)/
                "age=#{$1}.#{$2}"
            else
                n.tr('_', '').downcase
        end

       ([px] + aliases[px].to_a).map{|n| "#{INDENT * 4}new CodeRangeEntry(\"#{n}\", \"#{cr_map[c]}\")"}.join(",\n")
    end

    open("#{enc_dir}/UnicodeProperties.java", "wb") do |f|
        f << open("UnicodePropertiesTemplate.java", "rb").read.
        sub(/%\{stdcrs\}/, cnames[0..14].join(",\n")).
        sub(/%\{extcrs\}/, cnames.join(",\n"))
    end
end

enc_db = open("#{repo_path}/encdb.h").read.tr('()', '').scan(/ENC_([A-Z_]+)(.*?);/m).reject{|a, b| a =~ /DEFINE/}

open("#{dst_dir}/EncodingList.java", "wb"){|f| f << open("EncodingListTemplate.java", "rb").read.
    sub(/%\{body\}/, enc_db.map{|cmd, from, to| "#{INDENT*2}{\"#{cmd[0,1]}\", #{from}#{to.nil? ? "" : to}}"}.join(",\n"))}