1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <array>
#include <atomic>
#include <memory>
#include <string>
#include "port/port.h"
#include "rocksdb/slice.h"
#include "table/multiget_context.h"
#include "util/hash.h"
namespace ROCKSDB_NAMESPACE {
class Slice;
class Allocator;
class Logger;
// A Bloom filter intended only to be used in memory, never serialized in a way
// that could lead to schema incompatibility. Supports opt-in lock-free
// concurrent access.
//
// This implementation is also intended for applications generally preferring
// speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
// For 1% FP rate, that means that the latency of a look-up triggered by an FP
// should be less than roughly 100x the cost of a Bloom filter op.
//
// For simplicity and performance, the current implementation requires
// num_probes to be a multiple of two and <= 10.
//
class DynamicBloom {
public:
// allocator: pass allocator to bloom filter, hence trace the usage of memory
// total_bits: fixed total bits for the bloom
// num_probes: number of hash probes for a single key
// hash_func: customized hash function
// huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
// within this page size. Need to reserve huge pages for
// it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt
explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
Logger* logger = nullptr);
~DynamicBloom() {}
// Assuming single threaded access to this function.
void Add(const Slice& key);
// Like Add, but may be called concurrent with other functions.
void AddConcurrently(const Slice& key);
// Assuming single threaded access to this function.
void AddHash(uint32_t hash);
// Like AddHash, but may be called concurrent with other functions.
void AddHashConcurrently(uint32_t hash);
// Multithreaded access to this function is OK
bool MayContain(const Slice& key) const;
void MayContain(int num_keys, Slice* keys, bool* may_match) const;
// Multithreaded access to this function is OK
bool MayContainHash(uint32_t hash) const;
void Prefetch(uint32_t h);
private:
// Length of the structure, in 64-bit words. For this structure, "word"
// will always refer to 64-bit words.
uint32_t kLen;
// We make the k probes in pairs, two for each 64-bit read/write. Thus,
// this stores k/2, the number of words to double-probe.
const uint32_t kNumDoubleProbes;
std::atomic<uint64_t>* data_;
// or_func(ptr, mask) should effect *ptr |= mask with the appropriate
// concurrency safety, working with bytes.
template <typename OrFunc>
void AddHash(uint32_t hash, const OrFunc& or_func);
bool DoubleProbe(uint32_t h32, size_t a) const;
};
inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
inline void DynamicBloom::AddConcurrently(const Slice& key) {
AddHashConcurrently(BloomHash(key));
}
inline void DynamicBloom::AddHash(uint32_t hash) {
AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
ptr->store(ptr->load(std::memory_order_relaxed) | mask,
std::memory_order_relaxed);
});
}
inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
// Happens-before between AddHash and MaybeContains is handled by
// access to versions_->LastSequence(), so all we have to do here is
// avoid races (so we don't give the compiler a license to mess up
// our code) and not lose bits. std::memory_order_relaxed is enough
// for that.
if ((mask & ptr->load(std::memory_order_relaxed)) != mask) {
ptr->fetch_or(mask, std::memory_order_relaxed);
}
});
}
inline bool DynamicBloom::MayContain(const Slice& key) const {
return (MayContainHash(BloomHash(key)));
}
inline void DynamicBloom::MayContain(int num_keys, Slice* keys,
bool* may_match) const {
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
for (int i = 0; i < num_keys; ++i) {
hashes[i] = BloomHash(keys[i]);
size_t a = FastRange32(hashes[i], kLen);
PREFETCH(data_ + a, 0, 3);
byte_offsets[i] = a;
}
for (int i = 0; i < num_keys; i++) {
may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
}
}
#if defined(_MSC_VER)
#pragma warning(push)
// local variable is initialized but not referenced
#pragma warning(disable : 4189)
#endif
inline void DynamicBloom::Prefetch(uint32_t h32) {
size_t a = FastRange32(h32, kLen);
PREFETCH(data_ + a, 0, 3);
}
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
// Speed hacks in this implementation:
// * Uses fastrange instead of %
// * Minimum logic to determine first (and all) probed memory addresses.
// (Uses constant bit-xor offsets from the starting probe address.)
// * (Major) Two probes per 64-bit memory fetch/write.
// Code simplification / optimization: only allow even number of probes.
// * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
// least on recent CPUs, integer multiplication is very cheap. Each 64-bit
// remix provides five pairs of bit addresses within a uint64_t.)
// Code simplification / optimization: only allow up to 10 probes, from a
// single 64-bit remix.
//
// The FP rate penalty for this implementation, vs. standard Bloom filter, is
// roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
// This implementation does not explicitly use the cache line size, but is
// effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
//
// NB: could easily be upgraded to support a 64-bit hash and
// total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
// because of false positives.)
inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
size_t a = FastRange32(h32, kLen);
PREFETCH(data_ + a, 0, 3);
return DoubleProbe(h32, a);
}
inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
// Expand/remix with 64-bit golden ratio
uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
for (unsigned i = 0;; ++i) {
// Two bit probes per uint64_t probe
uint64_t mask =
((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
uint64_t val = data_[byte_offset ^ i].load(std::memory_order_relaxed);
if (i + 1 >= kNumDoubleProbes) {
return (val & mask) == mask;
} else if ((val & mask) != mask) {
return false;
}
h = (h >> 12) | (h << 52);
}
}
template <typename OrFunc>
inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
size_t a = FastRange32(h32, kLen);
PREFETCH(data_ + a, 0, 3);
// Expand/remix with 64-bit golden ratio
uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
for (unsigned i = 0;; ++i) {
// Two bit probes per uint64_t probe
uint64_t mask =
((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
or_func(&data_[a ^ i], mask);
if (i + 1 >= kNumDoubleProbes) {
return;
}
h = (h >> 12) | (h << 52);
}
}
} // namespace ROCKSDB_NAMESPACE
|