1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
module MDUrl
module Decode
@@decodeCache = {};
DEFTAULT_CHARS = ';/?:@&=+$,#'
COMPONENT_CHARS = ''
#------------------------------------------------------------------------------
def self.getDecodeCache(exclude)
cache = @@decodeCache[exclude]
return cache if (cache)
cache = @@decodeCache[exclude] = []
(0...128).each do |i|
ch = i.chr
cache.push(ch)
end
(0...exclude.length).each do |i|
ch = exclude[i].ord
cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2)
end
return cache
end
# Decode percent-encoded string.
#------------------------------------------------------------------------------
def self.decode(string, exclude = nil)
if !exclude.is_a? String
exclude = DEFTAULT_CHARS
end
cache = getDecodeCache(exclude)
return string.gsub(/(%[a-f0-9]{2})+/i) do |seq|
result = ''
i = 0
l = seq.length
while i < l
b1 = seq.slice((i + 1)...(i + 3)).to_i(16)
if (b1 < 0x80)
result += cache[b1]
i += 3
next
end
if ((b1 & 0xE0) == 0xC0 && (i + 3 < l))
# 110xxxxx 10xxxxxx
b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
if ((b2 & 0xC0) == 0x80)
char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)
if (char < 0x80)
result += "\ufffd\ufffd"
else
result += char.chr(Encoding::UTF_8)
end
i += 6
next
end
end
if ((b1 & 0xF0) == 0xE0 && (i + 6 < l))
# 1110xxxx 10xxxxxx 10xxxxxx
b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80)
char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)
if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF))
result += "\ufffd\ufffd\ufffd"
else
result += char.chr(Encoding::UTF_8)
end
i += 9
next
end
end
if ((b1 & 0xF8) == 0xF0 && (i + 9 < l))
# 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
b4 = seq.slice((i + 10)...(i + 12)).to_i(16)
if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80)
char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)
if (char < 0x10000 || char > 0x10FFFF)
result += "\ufffd\ufffd\ufffd\ufffd"
else
# TODO don't know how to handle surrogate pairs properly.
char -= 0x10000
result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join
# high = ((char - 0x10000) / 0x400).floor + 0xD800
# low = ((char - 0x10000) % 0x400) + 0xDC00
# result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase
end
i += 12
next
end
end
result += "\ufffd"
i += 3
end
result
end
end
end
end
# https://gist.github.com/kreeger/4480326
# class Fixnum
# def to_surrogate_pair
# if self >= 0x10000 && self <= 0x10FFFF
# high = ((self - 0x10000) / 0x400).floor + 0xD800
# low = ((self - 0x10000) % 0x400) + 0xDC00
# end
# '\U' + [high, low].map { |x| x.to_s(16) }.join('\U').upcase
# end
#
# end
#
# class String
# def to_hex
# self.gsub('\U000', '0x').to_i(16)
# end
# end
#
|