File: hostname_character_classes

package info (click to toggle)
ruby-json-schemer 2.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 544 kB
  • sloc: ruby: 7,428; makefile: 4; sh: 4
file content (42 lines) | stat: -rwxr-xr-x 1,524 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env ruby

require 'open-uri'
require 'csv'

# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2

csv_options = { :col_sep => ';', :skip_blanks => true, :skip_lines => /\A#/ }

unicode_data = URI('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
derived_joining_type = URI('https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningType.txt')

# https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
virama_canonical_combining_class = '9'

virama_codes = CSV.new(unicode_data.read, **csv_options).select do |code, _name, _category, canonical_combining_class|
  canonical_combining_class == virama_canonical_combining_class
end.map(&:first)

# https://www.unicode.org/reports/tr44/#Default_Values
# https://www.unicode.org/reports/tr44/#Derived_Extracted
codes_by_joining_type = CSV.new(derived_joining_type.read, **csv_options).group_by do |_code, joining_type|
  joining_type.gsub(/#.+/, '').strip
end.transform_values do |rows|
  rows.map do |code, _joining_type|
    code.strip
  end
end

def codes_to_character_class(codes)
  characters = codes.map do |code|
    code.gsub(/(\h+)/, '\u{\1}').gsub('..', '-')
  end
  "[#{characters.join}]"
end

puts "VIRAMA_CHARACTER_CLASS = '#{codes_to_character_class(virama_codes)}'"

codes_by_joining_type.slice('L', 'D', 'T', 'R').each do |joining_type, codes|
  puts "JOINING_TYPE_#{joining_type}_CHARACTER_CLASS = '#{codes_to_character_class(codes)}'"
end