File: unicode_data.ecr

package info (click to toggle)
crystal 1.14.0%2Bdfsg-1
links: PTS, VCS
area: main
in suites: sid, trixie
size: 24,384 kB
sloc: javascript: 6,400; sh: 695; makefile: 269; ansic: 121; python: 105; cpp: 77; xml: 32
file content (235 lines) | stat: -rw-r--r-- 9,635 bytes
# This file was automatically generated by running:
#
#   scripts/generate_unicode_data.cr
#
# DO NOT EDIT

module Unicode
  # Most case conversions map a range to another range.
  # Here we store: {from, to, delta}
  private class_getter upcase_ranges : Array({Int32, Int32, Int32}) do
    data = Array({Int32, Int32, Int32}).new(<%= upcase_ranges.size %>)
    <%- upcase_ranges.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>, <%= range.delta %>)
    <%- end -%>
    data
  end

  # Most case conversions map a range to another range.
  # Here we store: {from, to, delta}
  private class_getter downcase_ranges : Array({Int32, Int32, Int32}) do
    data = Array({Int32, Int32, Int32}).new(<%= downcase_ranges.size %>)
    <%- downcase_ranges.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>, <%= range.delta %>)
    <%- end -%>
    data
  end

  # Other case conversions run in an alternated range
  # of uppercase/lowercase transformations
  # Here we store {from, to}
  private class_getter alternate_ranges : Array({Int32, Int32}) do
    data = Array({Int32, Int32}).new(<%= alternate_ranges.size %>)
    <%- alternate_ranges.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>)
    <%- end -%>
    data
  end

  # We store categories as consecutive strides {from, to, stride}
  #
  # For example, in this case:
  #
  #   {1, 10, 1}
  #   {11, 15, 2}
  #
  # The values are: 1..10, 11, 13, 15

  <%- all_strides.each do |category, strides| -%>
    private class_getter category_<%= category %> : Array({Int32, Int32, Int32}) do
      data = Array({Int32, Int32, Int32}).new(<%= strides.size %>)
      <%- strides.each do |stride| -%>
        put(data, <%= stride.low %>, <%= stride.high %>, <%= stride.stride %>)
      <%- end -%>
      data
    end
  <%- end %>

  # Most casefold conversions map a range to another range.
  # Here we store: {from, to, delta}
  private class_getter casefold_ranges : Array({Int32, Int32, Int32}) do
    data = Array({Int32, Int32, Int32}).new(<%= casefold_ranges.size %>)
    <%- casefold_ranges.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>, <%= range.delta %>)
    <%- end -%>
    data
  end

  # Special downcase transformation that involve mapping a codepoint
  # to multiple codepoints. The maximum transformation is always 3
  # codepoints, so we store them all as 3 codepoints and 0 means end.
  private class_getter special_cases_downcase : Hash(Int32, {Int32, Int32, Int32}) do
    data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_downcase.size %>)
    <%- special_cases_downcase.each do |a_case| -%>
      put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
    <%- end %>
    data
  end

  # Special upcase transformation that involve mapping a codepoint
  # to multiple codepoints. The maximum transformation is always 3
  # codepoints, so we store them all as 3 codepoints and 0 means end.
  private class_getter special_cases_upcase : Hash(Int32, {Int32, Int32, Int32}) do
    data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_upcase.size %>)
    <%- special_cases_upcase.each do |a_case| -%>
      put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
    <%- end %>
    data
  end

  # Titlecase transformation that differs from the uppercase transformation.
  # The maximum transformation is always 3 codepoints, so we store them all as 3
  # codepoints and 0 means end.
  private class_getter special_cases_titlecase : Hash(Int32, {Int32, Int32, Int32}) do
    data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_titlecase.size %>)
    <%- special_cases_titlecase.each do |a_case| -%>
      put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
    <%- end %>
    data
  end

  # Fold case transformation that involve mapping a codepoint
  # to multiple codepoints. The maximum transformation is always 3
  # codepoints, so we store them all as 3 codepoints and 0 means end.
  private class_getter fold_cases : Hash(Int32, {Int32, Int32, Int32}) do
    data = Hash(Int32, {Int32, Int32, Int32}).new(initial_capacity: <%= special_cases_casefold.size %>)
    <%- special_cases_casefold.each do |a_case| -%>
      put(data, <%= a_case.codepoint %>, <%= a_case.value.join(", ") %>)
    <%- end -%>
    data
  end

  # Canonical combining classes. Only non-zero entries are stored. Unicode
  # guarantees that all class values are within `0..254`.
  # Here we store: {from, to, class}
  private class_getter canonical_combining_classes : Array({Int32, Int32, UInt8}) do
    data = Array({Int32, Int32, UInt8}).new(<%= canonical_combining_classes.size %>)
    <%- canonical_combining_classes.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>, <%= range.ccc %>_u8)
    <%- end -%>
    data
  end

  # Canonical decomposition mappings, excluding Hangul syllables. The maximum
  # transformation is always 2 codepoints, so we store them all as 2 codepoints
  # and 0 means end.
  private class_getter canonical_decompositions : Hash(Int32, {Int32, Int32}) do
    data = Hash(Int32, {Int32, Int32}).new(initial_capacity: <%= canonical_decompositions.size %>)
    <%- canonical_decompositions.each do |decomp| -%>
      put(data, <%= decomp.join(", ") %>)
    <%- end -%>
    data
  end

  # Codepoints for compatibility decomposition mappings.
  private class_getter compatibility_decomposition_data : Array(Int32) do
    data = Array(Int32).new(<%= compatibility_decomposition_data.size %>)
    <%- compatibility_decomposition_data.each do |codepoint| -%>
      put(data, <%= codepoint %>)
    <%- end -%>
    data
  end

  # Compatibility decomposition mappings, represented as subsequences of
  # `compatibility_decomposition_data`. The maximum transformation is 18
  # codepoints.
  # Here we store: codepoint => {index, count}
  private class_getter compatibility_decompositions : Hash(Int32, {Int32, Int32}) do
    data = Hash(Int32, {Int32, Int32}).new(initial_capacity: <%= compatibility_decompositions.size %>)
    <%- compatibility_decompositions.each do |codepoint, index, count| -%>
      put(data, <%= codepoint %>, <%= index %>, <%= count %>)
    <%- end -%>
    data
  end

  # Reverse mapping of the canonical decompositions, excluding the full
  # composition exclusions.
  # Here we store: (first << 21 | second) => codepoint
  private class_getter canonical_compositions : Hash(Int64, Int32) do
    data = Hash(Int64, Int32).new(initial_capacity: <%= canonical_compositions.size %>)
    <%- canonical_compositions.each do |first_second, codepoint| -%>
      put(data, <%= first_second %>_i64, <%= codepoint %>)
    <%- end -%>
    data
  end

  # Used to quickly determine whether a codepoint may appear under Normalization
  # Form C (yes if absent in this table).
  # Here we store: {low, high, result (no or maybe)}
  private class_getter nfc_quick_check : Array({Int32, Int32, QuickCheckResult}) do
    <%- quick_check = quick_checks[Unicode::NormalizationForm::NFC] -%>
    data = Array({Int32, Int32, QuickCheckResult}).new(<%= quick_check.size %>)
    <%- quick_check.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>, QuickCheckResult::<%= range.result %>)
    <%- end -%>
    data
  end

  # Used to quickly determine whether a codepoint may appear under Normalization
  # Form KC (yes if absent in this table).
  # Here we store: {low, high, result (no or maybe)}
  private class_getter nfkc_quick_check : Array({Int32, Int32, QuickCheckResult}) do
    <%- quick_check = quick_checks[Unicode::NormalizationForm::NFKC] -%>
    data = Array({Int32, Int32, QuickCheckResult}).new(<%= quick_check.size %>)
    <%- quick_check.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>, QuickCheckResult::<%= range.result %>)
    <%- end -%>
    data
  end

  # Used to quickly determine whether a codepoint may appear under Normalization
  # Form D (yes if absent in this table). There are no "maybe" values;
  # codepoints contained here may not appear under NFD.
  # Here we store: {low, high}
  private class_getter nfd_quick_check : Array({Int32, Int32}) do
    <%- quick_check = quick_checks[Unicode::NormalizationForm::NFD] -%>
    data = Array({Int32, Int32}).new(<%= quick_check.size %>)
    <%- quick_check.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>)
    <%- end -%>
    data
  end

  # Used to quickly determine whether a codepoint may appear under Normalization
  # Form KD (yes if absent in this table). There are no "maybe" values;
  # codepoints contained here may not appear under NFKD.
  # Here we store: {low, high}
  private class_getter nfkd_quick_check : Array({Int32, Int32}) do
    <%- quick_check = quick_checks[Unicode::NormalizationForm::NFKD] -%>
    data = Array({Int32, Int32}).new(<%= quick_check.size %>)
    <%- quick_check.each do |range| -%>
      put(data, <%= range.low %>, <%= range.high %>)
    <%- end -%>
    data
  end

  # TODO: this is needed to avoid generating lots of allocas
  # in LLVM, which makes LLVM really slow. The compiler should
  # try to avoid/reuse temporary allocas.
  # Explanation: https://github.com/crystal-lang/crystal/issues/4516#issuecomment-306226171
  private def self.put(array : Array, value) : Nil
    array << value
  end

  private def self.put(array : Array, *values) : Nil
    array << values
  end

  private def self.put(hash : Hash, key, value) : Nil
    hash[key] = value
  end

  private def self.put(hash : Hash, key, *values) : Nil
    hash[key] = values
  end
end