File: ucd-symbol-list.rb

package info (click to toggle)
groonga 15.0.4%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 163,080 kB
  • sloc: ansic: 770,564; cpp: 48,925; ruby: 40,447; javascript: 10,250; yacc: 7,045; sh: 5,602; python: 2,821; makefile: 1,672
file content (77 lines) | stat: -rwxr-xr-x 2,027 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env ruby

base_dir = ARGV[0]

@targets = {}
def register(character_code, description)
  @targets[character_code] = description
end

property_aliases = {}
File.open("#{base_dir}/PropertyValueAliases.txt") do |file|
  file.each_line do |line|
    case line
    when /\A[a-z]/i
      target, abbrev, name, = line.chomp.split(/\s*;\s*/)
      next if abbrev == "n/a"
      property_aliases[abbrev] = name
    end
  end
end

File.open("#{base_dir}/PropList.txt") do |file|
  file.each_line do |line|
    case line.chomp
    when /\A([\da-f]{4,5})(?:\.\.([\da-f]{4,5})) +; .+? \# (.{2})/i
      start = $1
      last = $2
      property_value_alias = $3
      property_value = property_aliases[property_value_alias]
      property_value ||= property_value_alias
      case property_value
      when "Dash_Punctuation",
        "Open_Punctuation",
        "Close_Punctuation",
        "Connector_Punctuation",
        "Other_Punctuation",
        "Math_Symbol",
        "Currency_Symbol",
        "Modifier_Symbol",
        "Other_Symbol"
        if last.nil?
          register(start.to_i(16), property_value)
        else
          (start.to_i(16)..last.to_i(16)).each do |character_code|
            register(character_code, property_value)
          end
        end
      end
    end
  end
end

File.open("#{base_dir}/Blocks.txt") do |file|
  file.each_line do |line|
    case line.chomp
    when /\A([\da-f]{4,5})\.\.([\da-f]{4,5}); (.+)\z/i
      start = $1
      last = $2
      description = $3
      case description
      when "CJK Symbols and Punctuation",
        "Enclosed CJK Letters and Months",
        "CJK Compatibility",
        "CJK Compatibility Forms"
        (start.to_i(16)..last.to_i(16)).each do |character_code|
          register(character_code, description)
        end
      end
    end
  end
end

@targets.keys.sort.each do |character_code|
  description = @targets[character_code]
  character = [character_code].pack("U")
  puts("%#x: %s: %s" % [character_code, character, description])
end