File: decode.rb

package info (click to toggle)
ruby-mdurl-rb 1.0.5-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 156 kB
  • sloc: ruby: 1,077; makefile: 7
file content (145 lines) | stat: -rwxr-xr-x 3,894 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
module MDUrl
  module Decode

    @@decodeCache = {};

    DEFTAULT_CHARS   = ';/?:@&=+$,#'
    COMPONENT_CHARS  = ''


    #------------------------------------------------------------------------------
    def self.getDecodeCache(exclude)
      cache = @@decodeCache[exclude]
      return cache if (cache)

      cache = @@decodeCache[exclude] = []

      (0...128).each do |i|
        ch = i.chr
        cache.push(ch)
      end

      (0...exclude.length).each do |i|
        ch = exclude[i].ord
        cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2)
      end

      return cache
    end


    # Decode percent-encoded string.
    #------------------------------------------------------------------------------
    def self.decode(string, exclude = nil)
      if !exclude.is_a? String
        exclude = DEFTAULT_CHARS
      end

      cache = getDecodeCache(exclude)

      return string.gsub(/(%[a-f0-9]{2})+/i) do |seq|
        result = ''

        i = 0
        l = seq.length
        while i < l
          b1 = seq.slice((i + 1)...(i + 3)).to_i(16)

          if (b1 < 0x80)
            result += cache[b1]
            i += 3
            next
          end

          if ((b1 & 0xE0) == 0xC0 && (i + 3 < l))
            # 110xxxxx 10xxxxxx
            b2 = seq.slice((i + 4)...(i + 6)).to_i(16)

            if ((b2 & 0xC0) == 0x80)
              char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)

              if (char < 0x80)
                result += "\ufffd\ufffd"
              else
                result += char.chr(Encoding::UTF_8)
              end

              i += 6
              next
            end
          end

          if ((b1 & 0xF0) == 0xE0 && (i + 6 < l))
            # 1110xxxx 10xxxxxx 10xxxxxx
            b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
            b3 = seq.slice((i + 7)...(i + 9)).to_i(16)

            if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80)
              char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)

              if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF))
                result += "\ufffd\ufffd\ufffd"
              else
                result += char.chr(Encoding::UTF_8)
              end

              i += 9
              next
            end
          end

          if ((b1 & 0xF8) == 0xF0 && (i + 9 < l))
            # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
            b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
            b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
            b4 = seq.slice((i + 10)...(i + 12)).to_i(16)

            if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80)
              char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)

              if (char < 0x10000 || char > 0x10FFFF)
                result += "\ufffd\ufffd\ufffd\ufffd"
              else
                # TODO don't know how to handle surrogate pairs properly.
                char   -= 0x10000
                result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join

                # high = ((char - 0x10000) / 0x400).floor + 0xD800
                # low  = ((char - 0x10000) % 0x400) + 0xDC00
                # result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase
              end

              i += 12
              next
            end
          end

          result += "\ufffd"
          i += 3
        end

        result
      end
    end

  end
end

# https://gist.github.com/kreeger/4480326
# class Fixnum
#   def to_surrogate_pair
#     if self >= 0x10000 && self <= 0x10FFFF
#       high = ((self - 0x10000) / 0x400).floor + 0xD800
#       low = ((self - 0x10000) % 0x400) + 0xDC00
#     end
#     '\U' + [high, low].map { |x| x.to_s(16) }.join('\U').upcase
#   end
#
# end
#
# class String
#   def to_hex
#     self.gsub('\U000', '0x').to_i(16)
#   end
# end
#