File: v4_rice.h

package info (click to toggle)
chromium 139.0.7258.127-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 6,122,156 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (162 lines) | stat: -rw-r--r-- 6,596 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Rice-Golomb decoder for blocklist updates.
// Details at: https://en.wikipedia.org/wiki/Golomb_coding

#ifndef COMPONENTS_SAFE_BROWSING_CORE_BROWSER_DB_V4_RICE_H_
#define COMPONENTS_SAFE_BROWSING_CORE_BROWSER_DB_V4_RICE_H_

#include <ostream>
#include <string>
#include <vector>
#include "base/gtest_prod_util.h"
#include "third_party/protobuf/src/google/protobuf/repeated_field.h"

namespace safe_browsing {

// Enumerate different failure events while decoding the Rice-encoded string
// sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF
// THESE VALUES.
enum V4DecodeResult {
  // No error.
  DECODE_SUCCESS = 0,

  // Exceeded the number of entries to expect.
  DECODE_NO_MORE_ENTRIES_FAILURE = 1,

  // Requested to decode >32 bits.
  DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2,

  // All bits had already been read and interpreted in the encoded string.
  DECODE_RAN_OUT_OF_BITS_FAILURE = 3,

  // The num_entries argument to DecodePrefixes or DecodeIntegers was negative.
  NUM_ENTRIES_NEGATIVE_FAILURE = 4,

  // Rice-encoding parameter was non-positive when the number of encoded entries
  // was > 0.
  RICE_PARAMETER_NON_POSITIVE_FAILURE = 5,

  // |encoded_data| was empty when the number of encoded entries was > 0.
  ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE = 6,

  // decoded value had an integer overflow, which is unexpected.
  DECODED_INTEGER_OVERFLOW_FAILURE = 7,

  // Memory space for histograms is determined by the max.  ALWAYS
  // ADD NEW VALUES BEFORE THIS ONE.
  DECODE_RESULT_MAX
};

class V4RiceDecoder {
 public:
  // Decodes the Rice-encoded string in |encoded_data| as a list of integers
  // and stores them in |out|. |rice_parameter| is the exponent of 2 for
  // calculating 'M', |first_value| is the first value in the output sequence,
  // |num_entries| is the number of subsequent encoded entries. Each decoded
  // value is a positive offset from the previous value.
  // So, for instance, if the unencoded sequence is: [3, 7, 25], then
  // |first_value| = 3, |num_entries| = 2 and decoding the |encoded_data| will
  // produce the offsets: [4, 18].
  static V4DecodeResult DecodeIntegers(
      const int64_t first_value,
      const int32_t rice_parameter,
      const int32_t num_entries,
      const std::string& encoded_data,
      ::google::protobuf::RepeatedField<int32_t>* out);

  // Decodes the Rice-encoded string in |encoded_data| as a string of 4-byte
  // hash prefixes and stores them in |out|. The rest of the arguments are the
  // same as for |DecodeIntegers|.
  // Important: |out| is only meant to be used as a concatenated list of sorted
  // 4-byte hash prefixes, not as a vector of uint32_t values.
  // This method does the following:
  // 1. Rice-decode the |encoded_data| as a list of uint32_t values.
  // 2. Flip the endianness (on little-endian machines) of each of these
  //    values. This is done because when a hash prefix is represented as a
  //    uint32_t, the bytes get reordered. This generates the hash prefix that
  //    the server would have sent in the absence of Rice-encoding.
  // 3. Sort the resulting list of uint32_t values.
  // 4. Flip the endianness once again since the uint32_t are expected to be
  //    consumed as a concatenated list of 4-byte hash prefixes, when merging
  //    the
  //    update with the existing state.
  static V4DecodeResult DecodePrefixes(const int64_t first_value,
                                       const int32_t rice_parameter,
                                       const int32_t num_entries,
                                       const std::string& encoded_data,
                                       std::vector<uint32_t>* out);

  virtual ~V4RiceDecoder();

  std::string DebugString() const;

 private:
  FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData);
  FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData);
  FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData);
  FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries);
  friend class V4RiceTest;

  // Validate some of the parameters passed to the decode methods.
  static V4DecodeResult ValidateInput(const int32_t rice_parameter,
                                      const int32_t num_entries,
                                      const std::string& encoded_data);

  // The |rice_parameter| is the exponent of 2 for calculating 'M',
  // |num_entries| is the number of encoded entries in the |encoded_data| and
  // |encoded_data| is the Rice-encoded string to decode.
  V4RiceDecoder(const int32_t rice_parameter,
                const int32_t num_entries,
                const std::string& encoded_data);

  // Returns true until |num_entries| entries have been decoded.
  bool HasAnotherValue() const;

  // Populates |value| with the next 32-bit unsigned integer decoded from
  // |encoded_data|.
  V4DecodeResult GetNextValue(uint32_t* value);

  // Reads in up to 32 bits from |encoded_data| into |word|, from which
  // subsequent GetNextBits() calls read bits.
  V4DecodeResult GetNextWord(uint32_t* word);

  // Reads |num_requested_bits| into |x| from |current_word_| and advances it
  // if needed by calling GetNextWord().
  V4DecodeResult GetNextBits(unsigned int num_requested_bits, uint32_t* x);

  // Reads |num_requested_bits| from |current_word_|.
  uint32_t GetBitsFromCurrentWord(unsigned int num_requested_bits);

  // The Rice parameter, which is the exponent of two for calculating 'M'. 'M'
  // is used as the base to calculate the quotient and remainder in the
  // algorithm.
  const unsigned int rice_parameter_;

  // The number of entries encoded in the data stream.
  int32_t num_entries_;

  // The Rice-encoded string.
  const std::string data_;

  // Represents how many total bytes have we read from |data_| into
  // |current_word_|.
  unsigned int data_byte_index_;

  // Represents the number of bits that we have read from |current_word_|. When
  // this becomes 32, which is the size of |current_word_|, a new
  // |current_word_| needs to be read from |data_|.
  unsigned int current_word_bit_index_;

  // The 32-bit value read from |data_|. All bit reading operations operate on
  // |current_word_|.
  uint32_t current_word_;
};

std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder);

}  // namespace safe_browsing

#endif  // COMPONENTS_SAFE_BROWSING_CORE_BROWSER_DB_V4_RICE_H_