File: conditional_casing.rb

package info (click to toggle)
ruby-unicode-utils 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,988 kB
  • sloc: ruby: 1,877; makefile: 4
file content (162 lines) | stat: -rw-r--r-- 3,915 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# -*- encoding: utf-8 -*-

require "unicode_utils/cased_char_q"
require "unicode_utils/case_ignorable_char_q"
require "unicode_utils/soft_dotted_char_q"
require "unicode_utils/combining_class"
require "unicode_utils/read_cdata"

module UnicodeUtils

  module Impl # :nodoc:all

    LANGS_WITH_RULES = {:tr => true, :lt => true, :az => true}

    class ConditionalCasing

      attr_reader :mapping

      def initialize(mapping)
        @mapping = mapping
      end

      def context_match?(str, pos)
        true
      end

    end

    class BeforeDotConditionalCasing < ConditionalCasing

      def context_match?(str, pos)
        (pos + 1).upto(str.length - 1) { |i|
          c = str[i]
          return true if c.ord == 0x0307
          cc = UnicodeUtils.combining_class(c)
          return false if cc == 0 || cc == 230
        }
        false # "combining dot above" not found
      end

    end

    class NotBeforeDotConditionalCasing < BeforeDotConditionalCasing

      def context_match?(str, pos)
        !super
      end

    end

    class MoreAboveConditionalCasing < ConditionalCasing

      def context_match?(str, pos)
        (pos + 1).upto(str.length - 1) { |i|
          c = str[i]
          cc = UnicodeUtils.combining_class(c)
          return true if cc == 230
          return false if cc == 0
        }
        false
      end

    end

    class AfterIConditionalCasing < ConditionalCasing

      def context_match?(str, pos)
        (pos - 1).downto(0) { |i|
          c = str[i]
          return true if c.ord == 0x49 # uppercase I
          cc = UnicodeUtils.combining_class(c)
          return false if cc == 0 || cc == 230
        }
        false # uppercase I not found
      end

    end

    class AfterSoftDottedConditionalCasing < ConditionalCasing

      def context_match?(str, pos)
        (pos - 1).downto(0) { |i|
          c = str[i]
          return true if UnicodeUtils.soft_dotted_char?(c)
          cc = UnicodeUtils.combining_class(c)
          return false if cc == 0 || cc == 230
        }
        false
      end

    end

    class FinalSigmaConditionalCasing < ConditionalCasing

      def context_match?(str, pos)
        before_match?(str, pos) && !after_match?(str, pos)
      end

      private

      def before_match?(str, pos)
        (pos - 1).downto(0) { |i|
          c = str[i]
          return true if UnicodeUtils.cased_char?(c)
          return false unless UnicodeUtils.case_ignorable_char?(c)
        }
        false # no cased char
      end

      def after_match?(str, pos)
        (pos + 1).upto(str.length - 1) { |i|
          c = str[i]
          return true if UnicodeUtils.cased_char?(c)
          return false unless UnicodeUtils.case_ignorable_char?(c)
        }
        false
      end

    end

    CONDITIONAL_UPCASE_MAP =
      read_conditional_casings("cond_uc_map")

    CONDITIONAL_DOWNCASE_MAP =
      read_conditional_casings("cond_lc_map")

    CONDITIONAL_TITLECASE_MAP =
      read_conditional_casings("cond_tc_map")

    def self.conditional_upcase_mapping(cp, str, pos, language_id)
      lang_map = CONDITIONAL_UPCASE_MAP[cp]
      if lang_map
        casing = lang_map[language_id] || lang_map[nil]
        if casing && casing.context_match?(str, pos)
          casing.mapping
        end
      end
    end

    def self.conditional_downcase_mapping(cp, str, pos, language_id)
      lang_map = CONDITIONAL_DOWNCASE_MAP[cp]
      if lang_map
        casing = lang_map[language_id] || lang_map[nil]
        if casing && casing.context_match?(str, pos)
          casing.mapping
        end
      end
    end

    def self.conditional_titlecase_mapping(cp, str, pos, language_id)
      lang_map = CONDITIONAL_TITLECASE_MAP[cp]
      if lang_map
        casing = lang_map[language_id] || lang_map[nil]
        if casing && casing.context_match?(str, pos)
          casing.mapping
        end
      end
    end

  end

end