1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
|
# frozen_string_literal: true
# Released under the MIT License.
# Copyright, 2025, by Samuel Williams.
module Protocol
module URL
# Helpers for encoding and decoding URL components.
module Encoding
# Escapes a string using percent encoding, e.g. `a b` -> `a%20b`.
#
# @parameter string [String] The string to escape.
# @returns [String] The escaped string.
#
# @example Escape spaces and special characters.
# Encoding.escape("hello world!")
# # => "hello%20world%21"
#
# @example Escape unicode characters.
# Encoding.escape("café")
# # => "caf%C3%A9"
def self.escape(string, encoding = string.encoding)
string.b.gsub(/([^a-zA-Z0-9_.\-]+)/) do |m|
"%" + m.unpack("H2" * m.bytesize).join("%").upcase
end.force_encoding(encoding)
end
# Unescapes a percent encoded string, e.g. `a%20b` -> `a b`.
#
# @parameter string [String] The string to unescape.
# @returns [String] The unescaped string.
#
# @example Unescape spaces and special characters.
# Encoding.unescape("hello%20world%21")
# # => "hello world!"
#
# @example Unescape unicode characters.
# Encoding.unescape("caf%C3%A9")
# # => "café"
def self.unescape(string, encoding = string.encoding)
string.b.gsub(/%(\h\h)/) do |hex|
Integer($1, 16).chr
end.force_encoding(encoding)
end
# Unescapes a percent encoded path component, preserving encoded path separators.
#
# This method unescapes percent-encoded characters except for path separators
# (forward slash `/` and backslash `\`). This prevents encoded separators like
# `%2F` or `%5C` from being decoded into actual path separators, which could
# allow bypassing path component boundaries.
#
# @parameter string [String] The path component to unescape.
# @returns [String] The unescaped string with separators still encoded.
#
# @example
# Encoding.unescape_path("hello%20world") # => "hello world"
# Encoding.unescape_path("safe%2Fname") # => "safe%2Fname" (%2F not decoded)
# Encoding.unescape_path("name%5Cfile") # => "name%5Cfile" (%5C not decoded)
def self.unescape_path(string, encoding = string.encoding)
string.b.gsub(/%(\h\h)/) do |hex|
byte = Integer($1, 16)
char = byte.chr
# Don't decode forward slash (0x2F) or backslash (0x5C)
if byte == 0x2F || byte == 0x5C
hex # Keep as %2F or %5C
else
char
end
end.force_encoding(encoding)
end
# Matches characters that are not allowed in a URI path segment. According to RFC 3986 Section 3.3 (https://tools.ietf.org/html/rfc3986#section-3.3), a valid path segment consists of "pchar" characters. This pattern identifies characters that must be percent-encoded when included in a URI path segment.
NON_PATH_CHARACTER_PATTERN = /([^a-zA-Z0-9_\-\.~!$&'()*+,;=:@\/]+)/.freeze
# Matches characters that are not allowed in a URI fragment. According to RFC 3986 Section 3.5, a valid fragment consists of pchar / "/" / "?" characters.
NON_FRAGMENT_CHARACTER_PATTERN = /([^a-zA-Z0-9_\-\.~!$&'()*+,;=:@\/\?]+)/.freeze
# Escapes non-path characters using percent encoding. In other words, this method escapes characters that are not allowed in a URI path segment. According to RFC 3986 Section 3.3 (https://tools.ietf.org/html/rfc3986#section-3.3), a valid path segment consists of "pchar" characters. This method percent-encodes characters that are not "pchar" characters.
#
# @parameter path [String] The path to escape.
# @returns [String] The escaped path.
#
# @example Escape spaces while preserving path separators.
# Encoding.escape_path("/documents/my reports/summary.pdf")
# # => "/documents/my%20reports/summary.pdf"
def self.escape_path(path)
encoding = path.encoding
path.b.gsub(NON_PATH_CHARACTER_PATTERN) do |m|
"%" + m.unpack("H2" * m.bytesize).join("%").upcase
end.force_encoding(encoding)
end
# Escapes non-fragment characters using percent encoding. According to RFC 3986 Section 3.5, fragments can contain pchar / "/" / "?" characters.
#
# @parameter fragment [String] The fragment to escape.
# @returns [String] The escaped fragment.
def self.escape_fragment(fragment)
encoding = fragment.encoding
fragment.b.gsub(NON_FRAGMENT_CHARACTER_PATTERN) do |m|
"%" + m.unpack("H2" * m.bytesize).join("%").upcase
end.force_encoding(encoding)
end
# Encodes a hash or array into a query string. This method is used to encode query parameters in a URL. For example, `{"a" => 1, "b" => 2}` is encoded as `a=1&b=2`.
#
# @parameter value [Hash | Array | Nil] The value to encode.
# @parameter prefix [String] The prefix to use for keys.
#
# @example Encode simple parameters.
# Encoding.encode({"name" => "Alice", "age" => "30"})
# # => "name=Alice&age=30"
#
# @example Encode nested parameters.
# Encoding.encode({"user" => {"name" => "Alice", "role" => "admin"}})
# # => "user[name]=Alice&user[role]=admin"
def self.encode(value, prefix = nil)
case value
when Array
return value.map {|v|
self.encode(v, "#{prefix}[]")
}.join("&")
when Hash
return value.map {|k, v|
self.encode(v, prefix ? "#{prefix}[#{escape(k.to_s)}]" : escape(k.to_s))
}.reject(&:empty?).join("&")
when nil
return prefix
else
raise ArgumentError, "value must be a Hash" if prefix.nil?
return "#{prefix}=#{escape(value.to_s)}"
end
end
# Scan a string for URL-encoded key/value pairs.
# @yields {|key, value| ...}
# @parameter key [String] The unescaped key.
# @parameter value [String] The unescaped key.
def self.scan(string)
string.split("&") do |assignment|
next if assignment.empty?
key, value = assignment.split("=", 2)
yield unescape(key), value.nil? ? value : unescape(value)
end
end
# Split a key into parts, e.g. `a[b][c]` -> `["a", "b", "c"]`.
#
# @parameter name [String] The key to split.
# @returns [Array(String)] The parts of the key.
def self.split(name)
name.scan(/([^\[]+)|(?:\[(.*?)\])/)&.tap do |parts|
parts.flatten!
parts.compact!
end
end
# Assign a value to a nested hash.
#
# This method handles building nested data structures from query string parameters, including arrays of objects. When processing array elements (empty key like `[]`), it intelligently decides whether to add to the last array element or create a new one.
#
# @parameter keys [Array(String)] The parts of the key.
# @parameter value [Object] The value to assign.
# @parameter parent [Hash] The parent hash.
#
# @example Building an array of objects.
# # Query: items[][name]=a&items[][value]=1&items[][name]=b&items[][value]=2
# # When "name" appears again, it creates a new array element
# # Result: {"items" => [{"name" => "a", "value" => "1"}, {"name" => "b", "value" => "2"}]}
def self.assign(keys, value, parent)
top, *middle = keys
middle.each_with_index do |key, index|
if key.nil? or key.empty?
# Array element (e.g., items[]):
parent = (parent[top] ||= Array.new)
top = parent.size
# Check if we should reuse the last array element or create a new one. If there's a nested key coming next, and the last array element already has that key, then we need a new array element. Otherwise, add to the existing one.
if nested = middle[index+1] and last = parent.last
# If the last element doesn't include the nested key, reuse it (decrement index).
# If it does include the key, keep current index (creates new element).
top -= 1 unless last.include?(nested)
end
else
# Hash key (e.g., user[name]):
parent = (parent[top] ||= Hash.new)
top = key
end
end
parent[top] = value
end
# Decode a URL-encoded query string into a hash.
#
# @parameter string [String] The query string to decode.
# @parameter maximum [Integer] The maximum number of keys in a path.
# @parameter symbolize_keys [Boolean] Whether to symbolize keys.
# @returns [Hash] The decoded query string.
#
# @example Decode simple parameters.
# Encoding.decode("name=Alice&age=30")
# # => {"name" => "Alice", "age" => "30"}
#
# @example Decode nested parameters.
# Encoding.decode("user[name]=Alice&user[role]=admin")
# # => {"user" => {"name" => "Alice", "role" => "admin"}}
def self.decode(string, maximum = 8, symbolize_keys: false)
parameters = {}
self.scan(string) do |name, value|
keys = self.split(name)
if keys.empty?
raise ArgumentError, "Invalid key path: #{name.inspect}!"
end
if keys.size > maximum
raise ArgumentError, "Key length exceeded limit!"
end
if symbolize_keys
keys.collect!{|key| key.empty? ? nil : key.to_sym}
end
self.assign(keys, value, parameters)
end
return parameters
end
end
end
end
|