File: writer.rb

package info (click to toggle)
ruby-character-set 1.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 416 kB
  • sloc: ansic: 2,597; ruby: 1,290; makefile: 7; sh: 4
file content (108 lines) | stat: -rw-r--r-- 3,914 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
class CharacterSet
  module Writer
    class << self
      def write(codepoint_ranges, opts = {}, &block)
        content = codepoint_ranges.map do |range|
          if range.size > 2 && opts[:abbreviate] != false
            bounds = [range.min, range.max]
            bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
          else
            range.map { |cp| write_codepoint(cp, opts, &block) }.join
          end
        end.join
        opts[:in_brackets] ? "[#{content}]" : content
      end

      def write_codepoint(codepoint, opts = {}, &block)
        Character.new(codepoint).escape(opts, &block)
      end

      def write_surrogate_ranges(bmp_ranges, astral_ranges)
        astral_branches = surrogate_range_expressions(astral_ranges)
        bmp_set_with_alternatives(bmp_ranges, astral_branches)
      end

      def write_surrogate_alternation(bmp_ranges, astral_ranges)
        astral_branches = surrogate_pairs(astral_ranges)
        bmp_set_with_alternatives(bmp_ranges, astral_branches)
      end

      private

      def surrogate_range_expressions(astral_ranges)
        compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
          [hi_ranges, lo_ranges].map do |ranges|
            use_brackets = ranges.size > 1 || ranges.first.size > 1
            write(ranges, format: :js, in_brackets: use_brackets)
          end.join
        end
      end

      def compressed_surrogate_range_pairs(astral_ranges)
        halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }

        # compress high surrogate codepoint ranges with common low range half
        with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
          hi_ranges = pairs.map(&:first)
          compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
            prev = arr.last
            if prev.nil? || prev.max + 1 < range.min # first or gap
              arr << range
            else # continuous codepoints, expand previous range
              arr[-1] = (prev.min)..(range.max)
            end
          end
          [compressed_hi_ranges, lo_range]
        end

        # compress low surrogate codepoint ranges with common high ranges
        with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
          (hash[hi_ranges] ||= []) << lo_range
        end
      end

      def surrogate_half_ranges(astral_range)
        hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
        hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
        hi_count = 1 + hi_max - hi_min
        return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1

        ranges = []

        # first high surrogate might be partially covered (if lo_min > 0xDC00)
        ranges << [hi_min..hi_min, lo_min..0xDFFF]

        # any high surrogates in between are fully covered
        ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2

        # last high surrogate might be partially covered (if lo_max < 0xDFFF)
        ranges << [hi_max..hi_max, 0xDC00..lo_max]

        ranges
      end

      def surrogate_pair_codepoints(astral_codepoint)
        base = astral_codepoint - 0x10000
        high = base / 1024 + 0xD800
        low  = base % 1024 + 0xDC00
        [high, low]
      end

      def bmp_set_with_alternatives(bmp_ranges, alternatives)
        bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
        return bmp_set if alternatives.empty? && bmp_ranges.any?

        "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
      end

      def surrogate_pairs(astral_ranges)
        astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
      end

      def surrogate_pair(astral_codepoint)
        surrogate_pair_codepoints(astral_codepoint)
          .map { |half| write_codepoint(half, format: :js) }.join
      end
    end
  end
end