1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
|
# frozen_string_literal: true
# rubocop:todo all
# Copyright (C) 2019-2020 MongoDB Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
require 'json'
module BSON
# This module contains methods for parsing Extended JSON 2.0.
# https://github.com/mongodb/specifications/blob/master/source/extended-json/extended-json.md
module ExtJSON
# Parses JSON in a string into a Ruby object tree.
#
# There are two strategies that this method can follow. If the canonical
# strategy is used which is the default, this method returns BSON types
# as much as possible. This allows the resulting object tree to be
# serialized back to extended JSON or to BSON while preserving the types.
# The relaxed strategy, enabled by passing {emit_relaxed: true} option,
# returns native Ruby types as much as possible which makes the resulting
# object tree easier to work with but may lose type information.
#
# Please note the following aspects of this method when emitting relaxed
# object trees:
#
# 1. $numberInt and $numberLong inputs produce Integer instances.
# 2. $regularExpression inputs produce BSON Regexp instances. This may
# change in a future version of bson-ruby to produce Ruby Regexp
# instances, potentially depending on regular expression options.
# 3. $numberDecimal inputs produce BSON Decimal128 instances. This may
# change in a future version of bson-ruby to produce Ruby BigDecimal
# instances instead.
#
# This method accepts canonical extended JSON, relaxed extended JSON and
# JSON without type information as well as a mix of the above.
#
# @note This method uses Ruby standard library's JSON.parse method to
# perform JSON parsing. As the JSON.parse method accepts inputs other
# than hashes, so does this method and therefore this method can return
# objects of any type.
#
# @param [ String ] str The string to parse.
#
# @option options [ nil | :bson ] :mode Which types to emit
#
# @return [ Object ] Parsed object tree.
module_function def parse(str, **options)
parse_obj(::JSON.parse(str), **options)
end
# Transforms a Ruby object tree containing extended JSON type hashes
# into a Ruby object tree with said hashes replaced by BSON or Ruby native
# types.
#
# @example Convert extended JSON type hashes:
# BSON::ExtJSON.parse_obj('foo' => {'$numberLong' => '42'})
# => {"foo"=>#<BSON::Int64:0x000055e55f4d40f0 @value=42>}
#
# @example Convert a non-hash value:
# BSON::ExtJSON.parse_obj('$numberLong' => '42')
# => #<BSON::Int64:0x000055e55f4e6ed0 @value=42>
#
# There are two strategies that this method can follow. If the canonical
# strategy is used which is the default, this method returns BSON types
# as much as possible. This allows the resulting object tree to be
# serialized back to extended JSON or to BSON while preserving the types.
# The relaxed strategy, enabled by passing {emit_relaxed: true} option,
# returns native Ruby types as much as possible which makes the resulting
# object tree easier to work with but may lose type information.
#
# Please note the following aspects of this method when emitting relaxed
# object trees:
#
# 1. $numberInt and $numberLong inputs produce Integer instances.
# 2. $regularExpression inputs produce BSON Regexp instances. This may
# change in a future version of bson-ruby to produce Ruby Regexp
# instances, potentially depending on regular expression options.
# 3. $numberDecimal inputs produce BSON Decimal128 instances. This may
# change in a future version of bson-ruby to produce Ruby BigDecimal
# instances instead.
#
# This method accepts object trees resulting from parsing canonical
# extended JSON, relaxed extended JSON and JSON without type information
# as well as a mix of the above.
#
# @note This method accepts any types as input, not just Hash instances.
# Consequently, it can return values of any type.
#
# @param [ Object ] value The object tree to convert.
#
# @option options [ nil | :bson ] :mode Which types to emit
#
# @return [ Object ] Converted object tree.
module_function def parse_obj(value, **options)
# TODO implement :ruby and :ruby! modes
unless [nil, :bson].include?(options[:mode])
raise ArgumentError, "Invalid value for :mode option: #{options[:mode].inspect}"
end
case value
when String, TrueClass, FalseClass, NilClass, Numeric
value
when Hash
parse_hash(value, **options)
when Array
value.map do |item|
parse_obj(item, **options)
end
else
raise Error::ExtJSONParseError, "Unknown value type: #{value}"
end
end
private
RESERVED_KEYS = %w(
$oid $symbol $numberInt $numberLong $numberDouble $numberDecimal
$binary $code $scope $timestamp $regularExpression $dbPointer
$date $minKey $maxKey $undefined
).freeze
RESERVED_KEYS_HASH = Hash[RESERVED_KEYS.map do |key|
[key, true]
end].freeze
module_function def parse_hash(hash, **options)
if hash.empty?
return {}
end
if dbref?(hash)
# Legacy dbref handling.
# Note that according to extended json spec, only hash values (but
# not the top-level BSON document itself) may be of type "dbref".
# This code applies to both hash values and the hash overall; however,
# since we do not have DBRef as a distinct type, applying the below
# logic to top level hashes doesn't cause harm.
hash = hash.dup
ref = hash.delete('$ref')
# $id, if present, can be anything
id = hash.delete('$id')
if id.is_a?(Hash)
id = parse_hash(id)
end
# Preserve $id value as it was, do not convert either to ObjectId
# or to a string. But if the value was in {'$oid' => ...} format,
# the value is converted to an ObjectId instance so that
# serialization to BSON later on works correctly.
out = {'$ref' => ref, '$id' => id}
if hash.key?('$db')
# $db must always be a string, if provided
out['$db'] = hash.delete('$db')
end
return out.update(parse_hash(hash))
end
if hash.length == 1
key, value = hash.first
return case key
when '$oid'
ObjectId.from_string(value)
when '$symbol'
Symbol::Raw.new(value)
when '$numberInt'
unless value.is_a?(String)
raise Error::ExtJSONParseError, "$numberInt value is of an incorrect type: #{value}"
end
value.to_i
when '$numberLong'
unless value.is_a?(String)
raise Error::ExtJSONParseError, "$numberLong value is of an incorrect type: #{value}"
end
value = value.to_i
if options[:mode] != :bson
value
else
Int64.new(value)
end
when '$numberDouble'
# This handles string to double conversion as well as inf/-inf/nan
unless value.is_a?(String)
raise Error::ExtJSONParseError, "Invalid $numberDouble value: #{value}"
end
BigDecimal(value).to_f
when '$numberDecimal'
# TODO consider returning BigDecimal here instead of Decimal128
Decimal128.new(value)
when '$binary'
unless value.is_a?(Hash)
raise Error::ExtJSONParseError, "Invalid $binary value: #{value}"
end
unless value.keys.sort == %w(base64 subType)
raise Error::ExtJSONParseError, "Invalid $binary value: #{value}"
end
encoded_value = value['base64']
unless encoded_value.is_a?(String)
raise Error::ExtJSONParseError, "Invalid base64 value in $binary: #{value}"
end
subtype = value['subType']
unless subtype.is_a?(String)
raise Error::ExtJSONParseError, "Invalid subType value in $binary: #{value}"
end
create_binary(encoded_value, subtype)
when '$uuid'
unless /\A[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\z/.match(value)
raise Error::ExtJSONParseError, "Invalid $uuid value: #{value}"
end
return Binary.from_uuid(value)
when '$code'
unless value.is_a?(String)
raise Error::ExtJSONParseError, "Invalid $code value: #{value}"
end
Code.new(value)
when '$timestamp'
unless value.keys.sort == %w(i t)
raise Error::ExtJSONParseError, "Invalid $timestamp value: #{value}"
end
t = value['t']
unless t.is_a?(Integer)
raise Error::ExtJSONParseError, "Invalid t value: #{value}"
end
i = value['i']
unless i.is_a?(Integer)
raise Error::ExtJSONParseError, "Invalid i value: #{value}"
end
Timestamp.new(t, i)
when '$regularExpression'
unless value.keys.sort == %w(options pattern)
raise Error::ExtJSONParseError, "Invalid $regularExpression value: #{value}"
end
# TODO consider returning Ruby regular expression object here
create_regexp(value['pattern'], value['options'])
when '$dbPointer'
unless value.keys.sort == %w($id $ref)
raise Error::ExtJSONParseError, "Invalid $dbPointer value: #{value}"
end
DbPointer.new(value['$ref'], parse_hash(value['$id']))
when '$date'
case value
when String
::Time.parse(value).utc
when Hash
unless value.keys.sort == %w($numberLong)
raise Error::ExtJSONParseError, "Invalid value for $date: #{value}"
end
sec, msec = value.values.first.to_i.divmod(1000)
::Time.at(sec, msec*1000).utc
else
raise Error::ExtJSONParseError, "Invalid value for $date: #{value}"
end
when '$minKey'
unless value == 1
raise Error::ExtJSONParseError, "Invalid $minKey value: #{value}"
end
MinKey.new
when '$maxKey'
unless value == 1
raise Error::ExtJSONParseError, "Invalid $maxKey value: #{value}"
end
MaxKey.new
when '$undefined'
unless value == true
raise Error::ExtJSONParseError, "Invalid $undefined value: #{value}"
end
Undefined.new
else
map_hash(hash, **options)
end
end
if hash.length == 2
sorted_keys = hash.keys.sort
first_key = sorted_keys.first
last_key = sorted_keys.last
if first_key == '$code'
unless sorted_keys == %w($code $scope)
raise Error::ExtJSONParseError, "Invalid $code value: #{hash}"
end
unless hash['$code'].is_a?(String)
raise Error::ExtJSONParseError, "Invalid $code value: #{value}"
end
return CodeWithScope.new(hash['$code'], map_hash(hash['$scope']))
end
if first_key == '$binary'
unless sorted_keys == %w($binary $type)
raise Error::ExtJSONParseError, "Invalid $binary value: #{hash}"
end
unless hash['$binary'].is_a?(String)
raise Error::ExtJSONParseError, "Invalid $binary value: #{value}"
end
unless hash['$type'].is_a?(String)
raise Error::ExtJSONParseError, "Invalid $binary subtype: #{hash['$type']}"
end
return create_binary(hash['$binary'], hash['$type'])
end
if last_key == '$regex'
unless sorted_keys == %w($options $regex)
raise Error::ExtJSONParseError, "Invalid $regex value: #{hash}"
end
if hash['$regex'].is_a?(Hash)
return {
'$regex' => parse_hash(hash['$regex']),
'$options' => hash['$options']
}
end
unless hash['$regex'].is_a?(String)
raise Error::ExtJSONParseError, "Invalid $regex pattern: #{hash['$regex']}"
end
unless hash['$options'].is_a?(String)
raise Error::ExtJSONParseError, "Invalid $regex options: #{hash['$options']}"
end
return create_regexp(hash['$regex'], hash['$options'])
end
verify_no_reserved_keys(hash, **options)
end
verify_no_reserved_keys(hash, **options)
end
module_function def verify_no_reserved_keys(hash, **options)
if hash.length > RESERVED_KEYS.length
if RESERVED_KEYS.any? { |key| hash.key?(key) }
raise Error::ExtJSONParseError, "Hash uses reserved keys but does not match a known type: #{hash}"
end
else
if hash.keys.any? { |key| RESERVED_KEYS_HASH.key?(key) }
raise Error::ExtJSONParseError, "Hash uses reserved keys but does not match a known type: #{hash}"
end
end
map_hash(hash, **options)
end
module_function def map_hash(hash, **options)
::Hash[hash.map do |key, value|
if (key.is_a?(String) || key.is_a?(Symbol)) && key.to_s.include?(NULL_BYTE)
raise Error::ExtJSONParseError, "Hash key cannot contain a null byte: #{key}"
end
[key, parse_obj(value, **options)]
end]
end
module_function def create_binary(encoded_value, encoded_subtype)
subtype = encoded_subtype.hex
type = Binary::TYPES[subtype.chr]
unless type
# Requires https://jira.mongodb.org/browse/RUBY-2056
raise NotImplementedError, "Binary subtype #{encoded_subtype} is not currently supported"
end
Binary.new(Base64.decode64(encoded_value), type)
end
module_function def create_regexp(pattern, options)
Regexp::Raw.new(pattern, options)
end
module_function def dbref?(hash)
if db = hash.key?('$db')
unless db.is_a?(String)
return false
end
end
return hash['$ref']&.is_a?(String) && hash.key?('$id')
end
end
end
|