1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
require 'open-uri'
repo_path = ARGV.first # path to ruby repo
enc_path = "#{repo_path}/enc"
folds_src = open("#{enc_path}/unicode.c").read
unicode_src = open("#{enc_path}/unicode/name2ctype.src").read
dst_dir = "../src/org/jcodings"
dst_bin_dir = "../resources/tables"
enc_dir = "#{dst_dir}/unicode"
INDENT = " " * 4
def assert_eq a, b, msg = ""
raise "unmet condition: #{a.inspect} == #{b.inspect}, info #{msg}" unless a == b
end
def assert_not_eq a, b, msg = ""
raise "unmet condition: #{a.inspect} != #{b.inspect}, info: #{msg}" unless a != b
end
def extract_to to, file
to = to.map do |t|
case t
when /^0x[0-9a-f]+$/
t.to_i(16)
else
t = t.split(',').map{|t|t.strip}
assert_eq(t.all?{|e|e =~ /^0x[0-9a-f]+$/}, true)
t.map{|t| t.to_i(16)}
end
end
open(file, "wb") do |f|
f << [to.size].pack("N")
to.each do |t| case t
when Fixnum
f << [1].pack("N")
f << [t].pack("N")
when Array
f << [t.size].pack("N")
t.each{|tx| f << [tx].pack("N")}
else
raise "foo"
end
end
end
end
folds = folds_src.scan(/static\s+const\s+(\w+)\s+(\w+)\[\]\s+=\s+\{(.*?)\}\;/m).map do |(type, name, tab)|
case type
when /Case(\w+)_11_Type/
from, to = tab.scan(/(\w+).+\{\w+?.+?\{(.+?)\s?\}/).transpose
assert_eq(to.size, from.size)
assert_eq(from.all?{|e| e =~ /^0x[0-9a-f]+$/}, true)
from = from.map{|e| e.to_i(16)}
open("#{dst_bin_dir}/#{name}_From.bin", "wb") do |f|
f << [from.size].pack("N")
from.each{|fr| f << [fr].pack("N")}
end
extract_to to, "#{dst_bin_dir}/#{name}_To.bin"
when "CaseUnfold_12_Type", "CaseUnfold_13_Type"
fld = tab.scan(/\{\s?\{(.+?)\}.+\{\w+?.+?\{(.+?)\s?\}/).flatten
extract_to fld, "#{dst_bin_dir}/#{name}.bin"
else
raise "error"
end
end
unicode_src.scan(/static\s+const\s+(\w+)\s+(\w+)\[\]\s+=\s+\{(.*?)\}\;/m).each do |(type, name, tab)|
tab = tab.split(",").map{|e|e.strip}
assert_eq(tab.last, "")
tab.pop
size = tab.size
head = tab.shift
assert_eq(head =~ /^\d+$/, 0)
head = head.to_i
assert_eq(tab[1..-1].all?{|e| e =~ /^0x[0-9a-f]+$/}, true)
tab = tab.map{|e| e.to_i(16)}
assert_eq(tab.all?{|e|e >= 0 && e <= 4294967295}, true)
open("#{dst_bin_dir}/#{name}.bin", "wb") do |f|
f << [size].pack("N")
f << [head].pack("N")
tab.each{|e| f << [e].pack("N") }
end
end
cr_map = unicode_src.scan(/#define (CR_.*?) (.*)/).inject(Hash.new{|h, k|k}){|h, (k,v)| h[k] = v;h}
aliases = unicode_src[/%%(.*?)%%/m, 1].scan(/(.*?),\s+(\d+)/).inject(Hash.new{|h, k| h[k] = []}){|h, (name, num)| h[num.to_i] << name; h}.inject({}) do |h, (k, v)|
full, *abbr = v.map{|e|e.strip}
h[full] = abbr
h
end
unicode_src.scan(/CodeRanges\[\]\s+=\s+\{(.*?)\}\;/m) do |e|
names = e.first.scan(/CR_\w+/)
cnames = names.map do |c|
n = c[/CR_(.*)/, 1]
px = case n
when /Age_(\d)_(\d)/
"age=#{$1}.#{$2}"
else
n.tr('_', '').downcase
end
([px] + aliases[px].to_a).map{|n| "#{INDENT * 4}new CodeRangeEntry(\"#{n}\", \"#{cr_map[c]}\")"}.join(",\n")
end
open("#{enc_dir}/UnicodeProperties.java", "wb") do |f|
f << open("UnicodePropertiesTemplate.java", "rb").read.
sub(/%\{stdcrs\}/, cnames[0..14].join(",\n")).
sub(/%\{extcrs\}/, cnames.join(",\n"))
end
end
enc_db = open("#{repo_path}/encdb.h").read.tr('()', '').scan(/ENC_([A-Z_]+)(.*?);/m).reject{|a, b| a =~ /DEFINE/}
open("#{dst_dir}/EncodingList.java", "wb"){|f| f << open("EncodingListTemplate.java", "rb").read.
sub(/%\{body\}/, enc_db.map{|cmd, from, to| "#{INDENT*2}{\"#{cmd[0,1]}\", #{from}#{to.nil? ? "" : to}}"}.join(",\n"))}
|