File: sync_casefold_data.rake

package info (click to toggle)
ruby-character-set 1.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 416 kB
  • sloc: ansic: 2,597; ruby: 1,290; makefile: 7; sh: 4
file content (20 lines) | stat: -rw-r--r-- 745 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
desc 'Download unicode casefold data and write new C header file'
task :sync_casefold_data do
  src_path = './CaseFolding.txt'
  dst_path = "#{__dir__}/../ext/character_set/unicode_casefold_table.h"

  `wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt`

  mapping = File.foreach(src_path).each_with_object({}) do |line, hash|
    from, type, to = line.split(/\s*;\s*/).first(3)
    # type 'C' stands for 'common', excludes mappings to multiple chars
    hash[from] = to if type == 'C'
  end.sort

  content = File.read(dst_path + '.tmpl')
    .sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
    .sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))

  File.write(dst_path, content)
  File.unlink(src_path)
end