File: ExractTranscoders.rb

package info (click to toggle)
jcodings 1.0.11-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,024 kB
  • ctags: 1,473
  • sloc: java: 10,088; ruby: 264; xml: 140; makefile: 5
file content (209 lines) | stat: -rw-r--r-- 6,764 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
require 'open-uri'
repo_path = ARGV.first # path to ruby repo

trans_path = "#{repo_path}/enc/trans"

dst_dir = "../src/org/jcodings"
dst_bin_dir = "../resources/tables"
trans_dir = "#{dst_dir}/transcode"
trans_dst_dir = "#{trans_dir}/specific"
INDENT = " " * 4

NOMAP =           0x01 # /* direct map */
ONEbt =           0x02 # /* one byte payload */
TWObt =           0x03 # /* two bytes payload */
THREEbt =         0x05 # /* three bytes payload */
FOURbt =          0x06 # /* four bytes payload, UTF-8 only, macros start at getBT0 */
INVALID =         0x07 # /* invalid byte sequence */
UNDEF =           0x09 # /* legal but undefined */
ZERObt =          0x0A # /* zero bytes of payload, i.e. remove */
FUNii =           0x0B # /* function from info to info */
FUNsi =           0x0D # /* function from start to info */
FUNio =           0x0E # /* function from info to output */
FUNso =           0x0F # /* function from start to output */
STR1 =            0x11 # /* string 4 <= len <= 259 bytes: 1byte length + content */
GB4bt =           0x12 # /* GB18030 four bytes payload */
FUNsio =          0x13 # /* function from start and info to output */

WORDINDEX_SHIFT_BITS = 2

def WORDINDEX2INFO widx
    widx << WORDINDEX_SHIFT_BITS
end

def makeSTR1LEN len
    len - 4
end

def makeSTR1 bi
    (bi << 6) | STR1
end

def o1 b1
    (b1 << 8) | ONEbt
end

def o2 b1, b2
    (b1 << 8) | (b2 << 16) | TWObt
end

def o3 b1, b2, b3
     ((b1 << 8) | (b2 << 16) | (b3 << 24) | THREEbt) & 0xffffffff
end

def o4 b0, b1, b2, b3
    ((b1 << 8) | (b2 << 16) | (b3 << 24) | ((b0 & 0x07) << 5) | FOURbt) & 0xffffffff
end

def g4 b0, b1, b2, b3
    ((b0 << 8) | (b2 << 16) | ((b1 & 0xf) << 24) | ((b3 & 0x0f) << 28) | GB4bt) & 0xffffffff
end

def funsio diff
    (diff << 8) & FUNsio
end

def assert_eq a, b, msg = ""
    raise "unmet condition: #{a.inspect} == #{b.inspect}, info #{msg}" unless a == b
end

def assert_not_eq a, b, msg = ""
    raise "unmet condition: #{a.inspect} != #{b.inspect}, info: #{msg}" unless a != b
end


def assert
    raise "unmet condition" unless yield
end

t = Time.now

template = open("TranscoderTemplate.java").read

transcoder_list = []
generic_list = []

Dir["#{trans_path}/*.c"].reject{|f| f =~ /transdb/}.each do |f|
    src = open(f).read
    defs = Hash[src.scan(/#define\s+(.*?)\s+(.*)/)]
    src = src.gsub(/#define\s+(.*?)\s+(.*)/, "")

    name = f[/(\w+)\.c/, 1].split('_').map{|e| e.capitalize}.join("")

    src =~ /\w+?_byte_array\[(\d+?)\]\s+=\s+\{(.*?)\}\;/m
    byte_array_size = $1.to_i
    byte_array = $2

    byte_array = byte_array.gsub(/\w+?\(.+?\)/){|e| eval e}

    byte_array = byte_array.gsub(/0x(\w+)/){|e| e.to_i(16).to_s}
    byte_array = byte_array.split(",").map{|e|e.strip}
    assert_eq(byte_array.last, "") # trailing comma

    byte_array.pop
    assert_eq(byte_array.size, byte_array_size)

    assert_eq(byte_array.all?{|b| b =~ /\d+/}, true)
    byte_array = byte_array.map(&:to_i)
    assert_eq(byte_array.all?{|b| b >= 0 && b <= 255}, true)
    
    open("#{dst_bin_dir}/Transcoder_#{name}_ByteArray.bin", "wb") do|f|
        f << [byte_array_size].pack("N")
        byte_array.each{|b| f << b.chr}
    end

    src =~ /\w+?_word_array\[(\d+?)\]\s+=\s+\{(.*?)\}\;/m
    word_array_size = $1.to_i
    word_array = $2

    ["INVALID", "UNDEF", "NOMAP", "FUNso", "FUNsi"].each{|c|defs[c] = Object.const_get(c)}

    word_array = word_array.gsub(/\w+?\(.+?\)/){|e| eval e}
    
    word_array = word_array.split(',').map{|e|e.strip}
    assert_eq(word_array.last, "") # trailing comma

    word_array.pop
    assert_eq(word_array.size, word_array_size)

    word_array = word_array.map do |b|
        if b =~ /^\d+$/
            b.to_i
        else
            v = defs[b]
            assert_not_eq(v, nil, b)

            v = case v
                when Fixnum
                    v
                when /(\w+?\(.+?\))/
                    v = eval(v)
                    assert_eq(v.class, Fixnum)
                    v
                when String
                    assert_eq(v =~ /^\d+$/, 0)
                    v.to_i
                else
                    raise "unknown type"
            end
            defs[b] = v
            v
        end
    end

    assert_eq(word_array.all?{|e|e >= 0 && e <= 4294967295}, true)

    open("#{dst_bin_dir}/Transcoder_#{name}_WordArray.bin", "wb") do|f|
        f << [word_array_size].pack("N")
        word_array.each{|b| f << [b].pack("N")}
    end

    src.scan(/static\s+const\s+rb_transcoder.*?(\w+)\s+=\s+\{(.+?)\};/m) do |t_name, body|
        n = t_name.split('_')
        t_name = n[1].capitalize
        t_name += '_' + n[2..-1].join('_') unless n[2..-1].empty?

        body =  body.gsub(/(\/\*.*?\*\/)/, "").split(',').map{|e|e.strip}
        src, dst, tree_start, table_info, iul, max_in, max_out, conv, state_size, state_init, state_fini, *funcs = body

        info = defs[table_info].split(',').map{|e|e.strip}[0..-2]
        b_arr, b_arr_length, w_arr, w_arr_length = info
        assert_eq(b_arr_length.to_i, byte_array_size)
        assert_eq(w_arr_length.to_i, word_array_size)

        specific = !funcs.all?{|f| f == "NULL"}

        state_size = "0" if state_size == "sizeof(struct from_utf8_mac_status)"

        # super_name = specific ? "Base_#{t_name}_Transcoder" : "Transcoder"
        puts "specific transcoder #{t_name} doesnt exist" if specific and not File.exist?("#{trans_dst_dir}/#{t_name}_Transcoder.java")

        ts = defs[tree_start]
        ts = case ts
            when Fixnum
                ts
            when /^\d+$/
                ts.to_i
            when /(\w+?\(.+?\))/
                eval(ts)
            else
                raise "error #{defs[tree_start]}"
        end

        if specific and false # initial generation
            open("#{trans_dst_dir}/#{t_name}_Transcoder.java", "wb") << template.
                gsub(/%\{name\}/, "#{t_name}_Transcoder").
                sub(/%\{super\}/, "Transcoder").
                sub(/%\{super_ctor\}/, [src, dst, ts, "\"#{name}\"", iul, max_in, max_out, "AsciiCompatibility.#{conv.split('_').last.upcase}", state_size].join(', '))
        end

        generic_list << [src, dst, ts, "\"#{name}\"", iul, max_in, max_out, "AsciiCompatibility.#{conv.split('_').last.upcase}", state_size]
        transcoder_list << [src, dst, t_name, specific]
    end
end
open("#{trans_dir}/TranscoderList.java", "wb") << open("TranscoderListTemplate.java").read.
    sub(/%\{list\}/, transcoder_list.map{|src, dst, cls, specific| "#{INDENT*2}{#{src}, #{dst}, #{specific ? '"' + cls + '"' : 'null /*' + cls + '*/'}}"}.join(",\n")).
    sub(/%\{generic\}/, generic_list.map{|g| "#{INDENT*2}new GenericTranscoderEntry(#{g.join(', ')})"}.join(",\n"))


p Time.now - t