1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
|
require 'open-uri'
repo_path = ARGV.first # path to ruby repo
trans_path = "#{repo_path}/enc/trans"
dst_dir = "../src/org/jcodings"
dst_bin_dir = "../resources/tables"
trans_dir = "#{dst_dir}/transcode"
trans_dst_dir = "#{trans_dir}/specific"
INDENT = " " * 4
NOMAP = 0x01 # /* direct map */
ONEbt = 0x02 # /* one byte payload */
TWObt = 0x03 # /* two bytes payload */
THREEbt = 0x05 # /* three bytes payload */
FOURbt = 0x06 # /* four bytes payload, UTF-8 only, macros start at getBT0 */
INVALID = 0x07 # /* invalid byte sequence */
UNDEF = 0x09 # /* legal but undefined */
ZERObt = 0x0A # /* zero bytes of payload, i.e. remove */
FUNii = 0x0B # /* function from info to info */
FUNsi = 0x0D # /* function from start to info */
FUNio = 0x0E # /* function from info to output */
FUNso = 0x0F # /* function from start to output */
STR1 = 0x11 # /* string 4 <= len <= 259 bytes: 1byte length + content */
GB4bt = 0x12 # /* GB18030 four bytes payload */
FUNsio = 0x13 # /* function from start and info to output */
WORDINDEX_SHIFT_BITS = 2
def WORDINDEX2INFO widx
widx << WORDINDEX_SHIFT_BITS
end
def makeSTR1LEN len
len - 4
end
def makeSTR1 bi
(bi << 6) | STR1
end
def o1 b1
(b1 << 8) | ONEbt
end
def o2 b1, b2
(b1 << 8) | (b2 << 16) | TWObt
end
def o3 b1, b2, b3
((b1 << 8) | (b2 << 16) | (b3 << 24) | THREEbt) & 0xffffffff
end
def o4 b0, b1, b2, b3
((b1 << 8) | (b2 << 16) | (b3 << 24) | ((b0 & 0x07) << 5) | FOURbt) & 0xffffffff
end
def g4 b0, b1, b2, b3
((b0 << 8) | (b2 << 16) | ((b1 & 0xf) << 24) | ((b3 & 0x0f) << 28) | GB4bt) & 0xffffffff
end
def funsio diff
(diff << 8) & FUNsio
end
def assert_eq a, b, msg = ""
raise "unmet condition: #{a.inspect} == #{b.inspect}, info #{msg}" unless a == b
end
def assert_not_eq a, b, msg = ""
raise "unmet condition: #{a.inspect} != #{b.inspect}, info: #{msg}" unless a != b
end
def assert
raise "unmet condition" unless yield
end
t = Time.now
template = open("TranscoderTemplate.java").read
transcoder_list = []
generic_list = []
Dir["#{trans_path}/*.c"].reject{|f| f =~ /transdb/}.each do |f|
src = open(f).read
defs = Hash[src.scan(/#define\s+(.*?)\s+(.*)/)]
src = src.gsub(/#define\s+(.*?)\s+(.*)/, "")
name = f[/(\w+)\.c/, 1].split('_').map{|e| e.capitalize}.join("")
src =~ /\w+?_byte_array\[(\d+?)\]\s+=\s+\{(.*?)\}\;/m
byte_array_size = $1.to_i
byte_array = $2
byte_array = byte_array.gsub(/\w+?\(.+?\)/){|e| eval e}
byte_array = byte_array.gsub(/0x(\w+)/){|e| e.to_i(16).to_s}
byte_array = byte_array.split(",").map{|e|e.strip}
assert_eq(byte_array.last, "") # trailing comma
byte_array.pop
assert_eq(byte_array.size, byte_array_size)
assert_eq(byte_array.all?{|b| b =~ /\d+/}, true)
byte_array = byte_array.map(&:to_i)
assert_eq(byte_array.all?{|b| b >= 0 && b <= 255}, true)
open("#{dst_bin_dir}/Transcoder_#{name}_ByteArray.bin", "wb") do|f|
f << [byte_array_size].pack("N")
byte_array.each{|b| f << b.chr}
end
src =~ /\w+?_word_array\[(\d+?)\]\s+=\s+\{(.*?)\}\;/m
word_array_size = $1.to_i
word_array = $2
["INVALID", "UNDEF", "NOMAP", "FUNso", "FUNsi"].each{|c|defs[c] = Object.const_get(c)}
word_array = word_array.gsub(/\w+?\(.+?\)/){|e| eval e}
word_array = word_array.split(',').map{|e|e.strip}
assert_eq(word_array.last, "") # trailing comma
word_array.pop
assert_eq(word_array.size, word_array_size)
word_array = word_array.map do |b|
if b =~ /^\d+$/
b.to_i
else
v = defs[b]
assert_not_eq(v, nil, b)
v = case v
when Fixnum
v
when /(\w+?\(.+?\))/
v = eval(v)
assert_eq(v.class, Fixnum)
v
when String
assert_eq(v =~ /^\d+$/, 0)
v.to_i
else
raise "unknown type"
end
defs[b] = v
v
end
end
assert_eq(word_array.all?{|e|e >= 0 && e <= 4294967295}, true)
open("#{dst_bin_dir}/Transcoder_#{name}_WordArray.bin", "wb") do|f|
f << [word_array_size].pack("N")
word_array.each{|b| f << [b].pack("N")}
end
src.scan(/static\s+const\s+rb_transcoder.*?(\w+)\s+=\s+\{(.+?)\};/m) do |t_name, body|
n = t_name.split('_')
t_name = n[1].capitalize
t_name += '_' + n[2..-1].join('_') unless n[2..-1].empty?
body = body.gsub(/(\/\*.*?\*\/)/, "").split(',').map{|e|e.strip}
src, dst, tree_start, table_info, iul, max_in, max_out, conv, state_size, state_init, state_fini, *funcs = body
info = defs[table_info].split(',').map{|e|e.strip}[0..-2]
b_arr, b_arr_length, w_arr, w_arr_length = info
assert_eq(b_arr_length.to_i, byte_array_size)
assert_eq(w_arr_length.to_i, word_array_size)
specific = !funcs.all?{|f| f == "NULL"}
state_size = "0" if state_size == "sizeof(struct from_utf8_mac_status)"
# super_name = specific ? "Base_#{t_name}_Transcoder" : "Transcoder"
puts "specific transcoder #{t_name} doesnt exist" if specific and not File.exist?("#{trans_dst_dir}/#{t_name}_Transcoder.java")
ts = defs[tree_start]
ts = case ts
when Fixnum
ts
when /^\d+$/
ts.to_i
when /(\w+?\(.+?\))/
eval(ts)
else
raise "error #{defs[tree_start]}"
end
if specific and false # initial generation
open("#{trans_dst_dir}/#{t_name}_Transcoder.java", "wb") << template.
gsub(/%\{name\}/, "#{t_name}_Transcoder").
sub(/%\{super\}/, "Transcoder").
sub(/%\{super_ctor\}/, [src, dst, ts, "\"#{name}\"", iul, max_in, max_out, "AsciiCompatibility.#{conv.split('_').last.upcase}", state_size].join(', '))
end
generic_list << [src, dst, ts, "\"#{name}\"", iul, max_in, max_out, "AsciiCompatibility.#{conv.split('_').last.upcase}", state_size]
transcoder_list << [src, dst, t_name, specific]
end
end
open("#{trans_dir}/TranscoderList.java", "wb") << open("TranscoderListTemplate.java").read.
sub(/%\{list\}/, transcoder_list.map{|src, dst, cls, specific| "#{INDENT*2}{#{src}, #{dst}, #{specific ? '"' + cls + '"' : 'null /*' + cls + '*/'}}"}.join(",\n")).
sub(/%\{generic\}/, generic_list.map{|g| "#{INDENT*2}new GenericTranscoderEntry(#{g.join(', ')})"}.join(",\n"))
p Time.now - t
|