1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
|
/*! \file */
#ifndef _ATTRIBUTE_STORE_H
#define _ATTRIBUTE_STORE_H
#include <mutex>
#include <deque>
#include <map>
#include <iostream>
#include <atomic>
#include <boost/functional/hash.hpp>
#include <boost/container/flat_map.hpp>
#include <vector>
#include <protozero/data_view.hpp>
#include "pooled_string.h"
#include "deque_map.h"
/* AttributeStore - global dictionary for attributes */
typedef uint32_t AttributeIndex; // check this is enough
struct string_ptr_less_than {
bool operator()(const std::string* lhs, const std::string* rhs) const {
return *lhs < *rhs;
}
};
class AttributeKeyStore {
public:
AttributeKeyStore(): finalized(false), keys2indexSize(0) {}
uint16_t key2index(const std::string& key);
const std::string& getKey(uint16_t index) const;
const std::string& getKeyUnsafe(uint16_t index) const;
void finalize() { finalized = true; }
std::atomic<uint32_t> keys2indexSize;
private:
bool finalized;
mutable std::mutex keys2indexMutex;
// NB: we use a deque, not a vector, because a deque never invalidates
// pointers to its members as long as you only push_back
std::deque<std::string> keys;
std::map<const std::string*, uint16_t, string_ptr_less_than> keys2index;
};
enum class AttributePairType: char { Bool = 0, Float = 1, String = 2 };
// AttributePair is a key/value pair (with minzoom)
#pragma pack(push, 1)
struct AttributePair {
short keyIndex : 9;
AttributePairType valueType : 3;
char minzoom : 4;
union {
float floatValue_;
PooledString stringValue_;
};
AttributePair(uint32_t keyIndex, bool value, char minzoom)
: keyIndex(keyIndex), valueType(AttributePairType::Bool), minzoom(minzoom), floatValue_(value ? 1 : 0)
{
}
AttributePair(uint32_t keyIndex, const PooledString& value, char minzoom)
: keyIndex(keyIndex), valueType(AttributePairType::String), stringValue_(value), minzoom(minzoom)
{
}
AttributePair(uint32_t keyIndex, float value, char minzoom)
: keyIndex(keyIndex), valueType(AttributePairType::Float), minzoom(minzoom), floatValue_(value)
{
}
AttributePair(const AttributePair& other):
keyIndex(other.keyIndex), valueType(other.valueType), minzoom(other.minzoom)
{
if (valueType == AttributePairType::Bool || valueType == AttributePairType::Float) {
floatValue_ = other.floatValue_;
return;
}
stringValue_ = other.stringValue_;
}
AttributePair& operator=(const AttributePair& other) {
keyIndex = other.keyIndex;
valueType = other.valueType;
minzoom = other.minzoom;
if (valueType == AttributePairType::Bool || valueType == AttributePairType::Float) {
floatValue_ = other.floatValue_;
return *this;
}
stringValue_ = other.stringValue_;
return *this;
}
bool operator<(const AttributePair& other) const {
if (minzoom != other.minzoom)
return minzoom < other.minzoom;
if (keyIndex != other.keyIndex)
return keyIndex < other.keyIndex;
if (valueType != other.valueType) return valueType < other.valueType;
if (hasStringValue()) return pooledString() < other.pooledString();
if (hasBoolValue()) return boolValue() < other.boolValue();
if (hasFloatValue()) return floatValue() < other.floatValue();
throw std::runtime_error("Invalid type in attribute store");
}
bool operator==(const AttributePair &other) const {
if (minzoom!=other.minzoom || keyIndex!=other.keyIndex || valueType!=other.valueType) return false;
if (valueType == AttributePairType::String)
return stringValue_ == other.stringValue_;
if (valueType == AttributePairType::Float || valueType == AttributePairType::Bool)
return floatValue_ == other.floatValue_;
return true;
}
bool hasStringValue() const { return valueType == AttributePairType::String; }
bool hasFloatValue() const { return valueType == AttributePairType::Float; }
bool hasBoolValue() const { return valueType == AttributePairType::Bool; }
const PooledString& pooledString() const { return stringValue_; }
const std::string stringValue() const { return stringValue_.toString(); }
float floatValue() const { return floatValue_; }
bool boolValue() const { return floatValue_; }
void ensureStringIsOwned();
static bool isHot(const std::string& keyName, const protozero::data_view value) {
// Is this pair a candidate for the hot pool?
// Hot pairs are pairs that we think are likely to be re-used, like
// tunnel=0, highway=yes, and so on.
//
// The trick is that we commit to putting them in the hot pool
// before we know if we were right.
// The rules for floats/booleans are managed in their addAttribute call.
// Only strings that are IDish are eligible: only lowercase letters.
bool ok = true;
for (size_t i = 0; i < value.size(); i++) {
const auto c = value.data()[i];
if (c != '-' && c != '_' && (c < 'a' || c > 'z'))
return false;
}
// Keys that sound like name, name:en, etc, aren't eligible.
if (keyName.size() >= 4 && keyName[0] == 'n' && keyName[1] == 'a' && keyName[2] == 'm' && keyName[3])
return false;
return true;
}
size_t hash() const {
std::size_t rv = minzoom;
boost::hash_combine(rv, keyIndex);
boost::hash_combine(rv, valueType);
if(hasStringValue()) {
const char* data = pooledString().data();
boost::hash_range(rv, data, data + pooledString().size());
} else if(hasFloatValue())
boost::hash_combine(rv, floatValue());
else if(hasBoolValue())
boost::hash_combine(rv, boolValue());
else {
throw new std::out_of_range("cannot hash pair, unknown value");
}
return rv;
}
};
#pragma pack(pop)
// We shard the cold pools to reduce the odds of lock contention on
// inserting/retrieving the "cold" pairs.
//
// It should be at least 2x the number of your cores -- 256 shards is probably
// reasonable for most people.
//
// We also reserve the bottom shard for the hot pool.
#define SHARD_BITS 14
#define ATTRIBUTE_SHARDS (1 << SHARD_BITS)
class AttributeStore;
class AttributePairStore {
public:
AttributePairStore():
finalized(false),
pairsMutex(ATTRIBUTE_SHARDS),
lookups(0),
lookupsUncached(0)
{
// The "hot" shard has a capacity of 64K, the others are unbounded.
pairs.push_back(DequeMap<AttributePair>(1 << 16));
// Reserve offset 0 as a sentinel
pairs[0].add(AttributePair(0, false, 0));
for (size_t i = 1; i < ATTRIBUTE_SHARDS; i++)
pairs.push_back(DequeMap<AttributePair>());
}
void finalize() { finalized = true; }
const AttributePair& getPair(uint32_t i) const;
const AttributePair& getPairUnsafe(uint32_t i) const;
uint32_t addPair(AttributePair& pair, bool isHot);
private:
friend class AttributeStore;
std::vector<DequeMap<AttributePair>> pairs;
bool finalized;
// We refer to all attribute pairs by index.
//
// Each shard is responsible for a portion of the key space.
//
// The 0th shard is special: it's the hot shard, for pairs
// we suspect will be popular. It only ever has 64KB items,
// so that we can reference it with a short.
mutable std::vector<std::mutex> pairsMutex;
std::atomic<uint64_t> lookupsUncached;
std::atomic<uint64_t> lookups;
};
// AttributeSet is a set of AttributePairs
// = the complete attributes for one object
struct AttributeSet {
bool operator<(const AttributeSet& other) const {
if (useVector != other.useVector)
return useVector < other.useVector;
if (useVector) {
if (intValues.size() != other.intValues.size())
return intValues.size() < other.intValues.size();
for (int i = 0; i < intValues.size(); i++) {
if (intValues[i] != other.intValues[i]) {
return intValues[i] < other.intValues[i];
}
}
return false;
}
for (int i = 0; i < sizeof(shortValues)/sizeof(shortValues[0]); i++) {
if (shortValues[i] != other.shortValues[i]) {
return shortValues[i] < other.shortValues[i];
}
}
return false;
}
size_t hash() const {
// Values are in canonical form after finalizeSet is called, so
// can hash them in the order they're stored.
if (useVector) {
const size_t n = intValues.size();
size_t idx = n;
for (int i = 0; i < n; i++)
boost::hash_combine(idx, intValues[i]);
return idx;
}
size_t idx = 0;
for (int i = 0; i < sizeof(shortValues)/sizeof(shortValues[0]); i++)
boost::hash_combine(idx, shortValues[i]);
return idx;
}
bool operator!=(const AttributeSet& other) const { return !(*this == other); }
bool operator==(const AttributeSet &other) const {
// Equivalent if, for every value in values, there is a value in other.values
// whose pair is the same.
//
// NB: finalizeSet ensures values are in canonical order, so we can just
// do a pairwise comparison.
if (useVector != other.useVector)
return false;
if (useVector) {
const size_t n = intValues.size();
const size_t otherN = other.intValues.size();
if (n != otherN)
return false;
for (size_t i = 0; i < n; i++)
if (intValues[i] != other.intValues[i])
return false;
return true;
}
return memcmp(shortValues, other.shortValues, sizeof(shortValues)) == 0;
}
void finalize();
// We store references to AttributePairs either in an array of shorts
// or a vector of 32-bit ints.
//
// The array of shorts is not _really_ an array of shorts. It's meant
// to be interpreted as 4 shorts, and then 4 ints.
bool useVector;
union {
short shortValues[12];
std::vector<uint32_t> intValues;
};
size_t numPairs() const {
if (useVector)
return intValues.size();
size_t rv = 0;
for (int i = 0; i < 8; i++)
if (isSet(i))
rv++;
return rv;
}
const uint32_t getPair(size_t i) const {
if (useVector)
return intValues[i];
size_t j = 0;
size_t actualIndex = 0;
// Advance actualIndex to the first non-zero entry, e.g. if
// the first thing added has a 4-byte index, our first entry
// is at location 4, not 0.
while(!isSet(actualIndex)) actualIndex++;
while (j < i) {
j++;
actualIndex++;
while(!isSet(actualIndex)) actualIndex++;
}
return getValueAtIndex(actualIndex);
}
AttributeSet(): useVector(false) {
for (int i = 0; i < sizeof(shortValues)/sizeof(shortValues[0]); i++)
shortValues[i] = 0;
}
AttributeSet(const AttributeSet &&a) = delete;
AttributeSet(const AttributeSet &a) {
useVector = a.useVector;
if (useVector) {
new (&intValues) std::vector<uint32_t>;
intValues = a.intValues;
} else {
for (int i = 0; i < sizeof(shortValues)/sizeof(shortValues[0]); i++)
shortValues[i] = a.shortValues[i];
}
}
~AttributeSet() {
if (useVector)
intValues.~vector();
}
void addPair(uint32_t index);
void removePairWithKey(const AttributePairStore& pairStore, uint32_t keyIndex);
private:
void setValueAtIndex(size_t index, uint32_t value) {
if (useVector) {
throw std::out_of_range("setValueAtIndex called for useVector=true");
}
if (index < 4 && value < (1 << 16)) {
shortValues[index] = (uint16_t)value;
} else if (index >= 4 && index < 8) {
((uint32_t*)(&shortValues[4]))[index - 4] = value;
} else {
throw std::out_of_range("setValueAtIndex out of bounds");
}
}
uint32_t getValueAtIndex(size_t index) const {
if (index < 4)
return shortValues[index];
return ((uint32_t*)(&shortValues[4]))[index - 4];
}
bool isSet(size_t index) const {
if (index < 4) return shortValues[index] != 0;
const size_t newIndex = 4 + 2 * (index - 4);
return shortValues[newIndex] != 0 || shortValues[newIndex + 1] != 0;
}
};
// AttributeStore is the store for all AttributeSets
struct AttributeStore {
AttributeIndex add(AttributeSet &attributes);
std::vector<const AttributePair*> getUnsafe(AttributeIndex index) const;
void reset(); // used for testing
size_t size() const;
void reportSize() const;
void finalize();
void addAttribute(AttributeSet& attributeSet, std::string const &key, const protozero::data_view v, char minzoom);
void addAttribute(AttributeSet& attributeSet, std::string const &key, float v, char minzoom);
void addAttribute(AttributeSet& attributeSet, std::string const &key, bool v, char minzoom);
AttributeStore():
finalized(false),
sets(ATTRIBUTE_SHARDS),
setsMutex(ATTRIBUTE_SHARDS),
lookups(0),
lookupsUncached(0) {
}
AttributeKeyStore keyStore;
AttributePairStore pairStore;
private:
bool finalized;
std::vector<DequeMap<AttributeSet>> sets;
mutable std::vector<std::mutex> setsMutex;
mutable std::mutex mutex;
std::atomic<uint64_t> lookupsUncached;
std::atomic<uint64_t> lookups;
};
#endif //_ATTRIBUTE_STORE_H
|