1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
|
// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_CBOR_READER_H_
#define COMPONENTS_CBOR_READER_H_
#include <stddef.h>
#include <map>
#include <optional>
#include "base/containers/span.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_span.h"
#include "components/cbor/cbor_export.h"
#include "components/cbor/values.h"
// Concise Binary Object Representation (CBOR) decoder as defined by
// https://tools.ietf.org/html/rfc7049. This decoder only accepts canonical CBOR
// as defined by section 3.9.
//
// This implementation supports the following major types:
// - 0: Unsigned integers, up to 64-bit values*.
// - 1: Signed integers, up to 64-bit values*.
// - 2: Byte strings.
// - 3: UTF-8 strings.
// - 4: Definite-length arrays.
// - 5: Definite-length maps.
// - 7: Simple values or floating point values.
//
// * Note: For simplicity, this implementation represents both signed and
// unsigned integers with signed int64_t. This reduces the effective range
// of unsigned integers.
//
// Requirements for canonical CBOR representation:
// - Duplicate keys in maps are not allowed.
// - Keys for maps must be sorted first by length and then by byte-wise
// lexical order, as defined in Section 3.9.
//
// Known limitations and interpretations of the RFC (and the reasons):
// - Does not support indefinite-length data streams or semantic tags (major
// type 6). (Simplicity; security)
// - Does not support the floating point and BREAK stop code value types in
// major type 7. (Simplicity)
// - Does not support non-character codepoints in major type 3. (Security)
// - Treats incomplete CBOR data items as syntax errors. (Security)
// - Treats trailing data bytes as errors. (Security)
// - Treats unknown additional information formats as syntax errors.
// (Simplicity; security)
// - Limits CBOR value inputs to at most 16 layers of nesting. Callers can
// enforce more shallow nesting by setting |max_nesting_level|. (Efficiency;
// security)
// - Only supports CBOR maps with integer or string type keys, due to the
// cost of serialization when sorting map keys. (Efficiency; simplicity)
// - Does not support simple values that are unassigned/reserved as per RFC
// 7049, and treats them as errors. (Security)
namespace cbor {
class CBOR_EXPORT Reader {
public:
enum class DecoderError {
CBOR_NO_ERROR = 0,
UNSUPPORTED_MAJOR_TYPE,
UNKNOWN_ADDITIONAL_INFO,
INCOMPLETE_CBOR_DATA,
INCORRECT_MAP_KEY_TYPE,
TOO_MUCH_NESTING,
INVALID_UTF8,
EXTRANEOUS_DATA,
OUT_OF_ORDER_KEY,
NON_MINIMAL_CBOR_ENCODING,
UNSUPPORTED_SIMPLE_VALUE,
UNSUPPORTED_FLOATING_POINT_VALUE,
OUT_OF_RANGE_INTEGER_VALUE,
DUPLICATE_KEY,
UNKNOWN_ERROR,
};
// CBOR nested depth sufficient for most use cases.
static const int kCBORMaxDepth = 16;
// Config contains configuration for a CBOR parsing operation.
struct CBOR_EXPORT Config {
Config();
Config(const Config&) = delete;
Config& operator=(const Config&) = delete;
~Config();
// Used to report the number of bytes of input consumed. This suppresses the
// |EXTRANEOUS_DATA| error case. May be nullptr.
raw_ptr<size_t> num_bytes_consumed = nullptr;
// Used to report the specific error in the case that parsing fails. May be
// nullptr;
raw_ptr<DecoderError, DanglingUntriaged> error_code_out = nullptr;
// Controls the maximum depth of CBOR nesting that will be permitted. This
// exists to control stack consumption during parsing.
int max_nesting_level = kCBORMaxDepth;
// Causes strings that are not valid UTF-8 to be accepted and suppresses the
// |INVALID_UTF8| error, unless such strings are map keys. Invalid strings
// will result in Values of type |INVALID_UTF8| rather than |STRING|. Users
// of this feature should ensure that every invalid string is accounted for
// in the resulting structure.
//
// (Map keys are not allowed to be invalid because it was not necessary for
// the motivating case and because it adds complexity to handle the ordering
// correctly.)
bool allow_invalid_utf8 = false;
// Causes an input to be accepted even if it contains one or more maps with
// keys that are not in the canonical ordering as defined in Section 3.9,
// and suppresses the OUT_OF_ORDER_KEY error. The original ordering of keys
// will _not_ be preserved, but instead, in the returned cbor::Value, all
// maps are re-sorted so that their keys are in canonical order. By
// definition, enabling this option may result in loss of information (i.e.
// the original key ordering).
//
// Enabling this option will still not allow duplicate keys, in case of
// which the DUPLICATE_KEY error will be emitted.
bool allow_and_canonicalize_out_of_order_keys = false;
// Causes floating point in CBOR to be decoded. This is an option as
// several users of this library do not want to accept floats in CBOR. When
// this option is set to `false` any floating point values encountered
// during decoding will set raise the `UNSUPPORTED_FLOATING_POINT_VALUE`
// error.
bool allow_floating_point = false;
};
Reader(const Reader&) = delete;
Reader& operator=(const Reader&) = delete;
~Reader();
// Reads and parses |input_data| into a Value. Returns an empty Optional
// if the input violates any one of the syntax requirements (including unknown
// additional info and incomplete CBOR data).
//
// The caller can optionally provide |error_code_out| to obtain additional
// information about decoding failures.
//
// If the caller provides it, |max_nesting_level| cannot exceed
// |kCBORMaxDepth|.
//
// Returns an empty Optional if not all the data was consumed, and sets
// |error_code_out| to EXTRANEOUS_DATA in this case.
static std::optional<Value> Read(base::span<const uint8_t> input_data,
DecoderError* error_code_out = nullptr,
int max_nesting_level = kCBORMaxDepth);
// A version of |Read|, above, that takes a |Config| structure to allow
// additional controls.
static std::optional<Value> Read(base::span<const uint8_t> input_data,
const Config& config);
// A version of |Read| that takes some fields of |Config| as parameters to
// avoid having to construct a |Config| object explicitly.
static std::optional<Value> Read(base::span<const uint8_t> input_data,
size_t* num_bytes_consumed,
DecoderError* error_code_out = nullptr,
int max_nesting_level = kCBORMaxDepth);
// Translates errors to human-readable error messages.
static const char* ErrorCodeToString(DecoderError error_code);
private:
explicit Reader(base::span<const uint8_t> data);
// Encapsulates information extracted from the header of a CBOR data item,
// which consists of the initial byte, and a variable-length-encoded integer
// (if any).
struct DataItemHeader {
// The major type decoded from the initial byte.
Value::Type type;
// The raw 5-bit additional information from the initial byte.
uint8_t additional_info;
// The integer |value| decoded from the |additional_info| and the
// variable-length-encoded integer, if any.
uint64_t value;
};
std::optional<DataItemHeader> DecodeDataItemHeader();
std::optional<Value> DecodeCompleteDataItem(const Config& config,
int max_nesting_level);
std::optional<Value> DecodeValueToNegative(uint64_t value);
std::optional<Value> DecodeValueToUnsigned(uint64_t value);
std::optional<Value> DecodeToSimpleValueOrFloat(const DataItemHeader& header,
const Config& config);
std::optional<uint64_t> ReadVariadicLengthInteger(Value::Type type,
uint8_t additional_info);
std::optional<Value> ReadByteStringContent(const DataItemHeader& header);
std::optional<Value> ReadStringContent(const DataItemHeader& header,
const Config& config);
std::optional<Value> ReadArrayContent(const DataItemHeader& header,
const Config& config,
int max_nesting_level);
std::optional<Value> ReadMapContent(const DataItemHeader& header,
const Config& config,
int max_nesting_level);
std::optional<uint8_t> ReadByte();
std::optional<base::span<const uint8_t>> ReadBytes(uint64_t num_bytes);
bool IsKeyInOrder(const Value& new_key,
const std::map<Value, Value, Value::Less>& map);
// Check if `new_key` is a duplicate of a key that already exists in the
// `map`.
bool IsDuplicateKey(const Value& new_key,
const std::map<Value, Value, Value::Less>& map);
bool IsEncodingMinimal(uint8_t additional_bytes, uint64_t uint_data);
DecoderError GetErrorCode() { return error_code_; }
size_t num_bytes_remaining() const { return rest_.size(); }
base::raw_span<const uint8_t> rest_;
DecoderError error_code_;
};
} // namespace cbor
#endif // COMPONENTS_CBOR_READER_H_
|