1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
|
desc 'Download unicode casefold data and write new C header file'
task :sync_casefold_data do
src_path = './CaseFolding.txt'
dst_path = "#{__dir__}/../ext/character_set/unicode_casefold_table.h"
`wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt`
mapping = File.foreach(src_path).each_with_object({}) do |line, hash|
from, type, to = line.split(/\s*;\s*/).first(3)
# type 'C' stands for 'common', excludes mappings to multiple chars
hash[from] = to if type == 'C'
end.sort
content = File.read(dst_path + '.tmpl')
.sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
.sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))
File.write(dst_path, content)
File.unlink(src_path)
end
|