1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
|
# This file was automatically generated by running:
#
# scripts/generate_unicode_data.cr
#
# DO NOT EDIT
module Unicode
# Most case conversions map a range to another range.
# Here we store: {from, to, delta}
private class_getter upcase_ranges : Array({Int32, Int32, Int32}) do
data = Array({Int32, Int32, Int32}).new(<%= upcase_ranges.size %>)
<%- upcase_ranges.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>, <%= range.delta %>)
<%- end -%>
data
end
# Most case conversions map a range to another range.
# Here we store: {from, to, delta}
private class_getter downcase_ranges : Array({Int32, Int32, Int32}) do
data = Array({Int32, Int32, Int32}).new(<%= downcase_ranges.size %>)
<%- downcase_ranges.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>, <%= range.delta %>)
<%- end -%>
data
end
# Other case conversions run in an alternated range
# of uppercase/lowercase transformations
# Here we store {from, to}
private class_getter alternate_ranges : Array({Int32, Int32}) do
data = Array({Int32, Int32}).new(<%= alternate_ranges.size %>)
<%- alternate_ranges.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>)
<%- end -%>
data
end
# We store categories as consecutive strides {from, to, stride}
#
# For example, in this case:
#
# {1, 10, 1}
# {11, 15, 2}
#
# The values are: 1..10, 11, 13, 15
<%- all_strides.each do |category, strides| -%>
private class_getter category_<%= category %> : Array({Int32, Int32, Int32}) do
data = Array({Int32, Int32, Int32}).new(<%= strides.size %>)
<%- strides.each do |stride| -%>
put(data, <%= stride.low %>, <%= stride.high %>, <%= stride.stride %>)
<%- end -%>
data
end
<%- end %>
# Most casefold conversions map a range to another range.
# Here we store: {from, to, delta}
private class_getter casefold_ranges : Array({Int32, Int32, Int32}) do
data = Array({Int32, Int32, Int32}).new(<%= casefold_ranges.size %>)
<%- casefold_ranges.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>, <%= range.delta %>)
<%- end -%>
data
end
# Special downcase transformation that involve mapping a codepoint
# to multiple codepoints. The maximum transformation is always 3
# codepoints, so we store them all as 3 codepoints and 0 means end.
private class_getter special_cases_downcase : Hash(Int32, {Int32, Int32, Int32}) do
data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_downcase.size %>)
<%- special_cases_downcase.each do |a_case| -%>
put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
<%- end %>
data
end
# Special upcase transformation that involve mapping a codepoint
# to multiple codepoints. The maximum transformation is always 3
# codepoints, so we store them all as 3 codepoints and 0 means end.
private class_getter special_cases_upcase : Hash(Int32, {Int32, Int32, Int32}) do
data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_upcase.size %>)
<%- special_cases_upcase.each do |a_case| -%>
put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
<%- end %>
data
end
# Titlecase transformation that differs from the uppercase transformation.
# The maximum transformation is always 3 codepoints, so we store them all as 3
# codepoints and 0 means end.
private class_getter special_cases_titlecase : Hash(Int32, {Int32, Int32, Int32}) do
data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_titlecase.size %>)
<%- special_cases_titlecase.each do |a_case| -%>
put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
<%- end %>
data
end
# Fold case transformation that involve mapping a codepoint
# to multiple codepoints. The maximum transformation is always 3
# codepoints, so we store them all as 3 codepoints and 0 means end.
private class_getter fold_cases : Hash(Int32, {Int32, Int32, Int32}) do
data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_casefold.size %>)
<%- special_cases_casefold.each do |a_case| -%>
put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
<%- end -%>
data
end
# Canonical combining classes. Only non-zero entries are stored. Unicode
# guarantees that all class values are within `0..254`.
# Here we store: {from, to, class}
private class_getter canonical_combining_classes : Array({Int32, Int32, UInt8}) do
data = Array({Int32, Int32, UInt8}).new(<%= canonical_combining_classes.size %>)
<%- canonical_combining_classes.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>, <%= range.ccc %>_u8)
<%- end -%>
data
end
# Canonical decomposition mappings, excluding Hangul syllables. The maximum
# transformation is always 2 codepoints, so we store them all as 2 codepoints
# and 0 means end.
private class_getter canonical_decompositions : Hash(Int32, {Int32, Int32}) do
data = Hash(Int32, {Int32, Int32}).new(initial_capacity: <%= canonical_decompositions.size %>)
<%- canonical_decompositions.each do |decomp| -%>
put(data, <%= decomp.join(", ") %>)
<%- end -%>
data
end
# Codepoints for compatibility decomposition mappings.
private class_getter compatibility_decomposition_data : Array(Int32) do
data = Array(Int32).new(<%= compatibility_decomposition_data.size %>)
<%- compatibility_decomposition_data.each do |codepoint| -%>
put(data, <%= codepoint %>)
<%- end -%>
data
end
# Compatibility decomposition mappings, represented as subsequences of
# `compatibility_decomposition_data`. The maximum transformation is 18
# codepoints.
# Here we store: codepoint => {index, count}
private class_getter compatibility_decompositions : Hash(Int32, {Int32, Int32}) do
data = Hash(Int32, {Int32, Int32}).new(initial_capacity: <%= compatibility_decompositions.size %>)
<%- compatibility_decompositions.each do |codepoint, index, count| -%>
put(data, <%= codepoint %>, <%= index %>, <%= count %>)
<%- end -%>
data
end
# Reverse mapping of the canonical decompositions, excluding the full
# composition exclusions.
# Here we store: (first << 21 | second) => codepoint
private class_getter canonical_compositions : Hash(Int64, Int32) do
data = Hash(Int64, Int32).new(initial_capacity: <%= canonical_compositions.size %>)
<%- canonical_compositions.each do |first_second, codepoint| -%>
put(data, <%= first_second %>_i64, <%= codepoint %>)
<%- end -%>
data
end
# Used to quickly determine whether a codepoint may appear under Normalization
# Form C (yes if absent in this table).
# Here we store: {low, high, result (no or maybe)}
private class_getter nfc_quick_check : Array({Int32, Int32, QuickCheckResult}) do
<%- quick_check = quick_checks[Unicode::NormalizationForm::NFC] -%>
data = Array({Int32, Int32, QuickCheckResult}).new(<%= quick_check.size %>)
<%- quick_check.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>, QuickCheckResult::<%= range.result %>)
<%- end -%>
data
end
# Used to quickly determine whether a codepoint may appear under Normalization
# Form KC (yes if absent in this table).
# Here we store: {low, high, result (no or maybe)}
private class_getter nfkc_quick_check : Array({Int32, Int32, QuickCheckResult}) do
<%- quick_check = quick_checks[Unicode::NormalizationForm::NFKC] -%>
data = Array({Int32, Int32, QuickCheckResult}).new(<%= quick_check.size %>)
<%- quick_check.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>, QuickCheckResult::<%= range.result %>)
<%- end -%>
data
end
# Used to quickly determine whether a codepoint may appear under Normalization
# Form D (yes if absent in this table). There are no "maybe" values;
# codepoints contained here may not appear under NFD.
# Here we store: {low, high}
private class_getter nfd_quick_check : Array({Int32, Int32}) do
<%- quick_check = quick_checks[Unicode::NormalizationForm::NFD] -%>
data = Array({Int32, Int32}).new(<%= quick_check.size %>)
<%- quick_check.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>)
<%- end -%>
data
end
# Used to quickly determine whether a codepoint may appear under Normalization
# Form KD (yes if absent in this table). There are no "maybe" values;
# codepoints contained here may not appear under NFKD.
# Here we store: {low, high}
private class_getter nfkd_quick_check : Array({Int32, Int32}) do
<%- quick_check = quick_checks[Unicode::NormalizationForm::NFKD] -%>
data = Array({Int32, Int32}).new(<%= quick_check.size %>)
<%- quick_check.each do |range| -%>
put(data, <%= range.low %>, <%= range.high %>)
<%- end -%>
data
end
# TODO: this is needed to avoid generating lots of allocas
# in LLVM, which makes LLVM really slow. The compiler should
# try to avoid/reuse temporary allocas.
# Explanation: https://github.com/crystal-lang/crystal/issues/4516#issuecomment-306226171
private def self.put(array : Array, value) : Nil
array << value
end
private def self.put(array : Array, *values) : Nil
array << values
end
private def self.put(hash : Hash, key, value) : Nil
hash[key] = value
end
private def self.put(hash : Hash, key, *values) : Nil
hash[key] = values
end
end
|