1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728
|
/*
A C++ header for Roaring Bitmaps.
*/
#ifndef INCLUDE_ROARING_HH_
#define INCLUDE_ROARING_HH_
#include <stdarg.h>
#include <roaring/roaring.h>
#include <algorithm>
#include <new>
#include <stdexcept>
#include <string>
class RoaringSetBitForwardIterator;
class Roaring {
public:
/**
* Create an empty bitmap
*/
Roaring() {
ra_init(&roaring.high_low_container);
}
/**
* Construct a bitmap from a list of integer values.
*/
Roaring(size_t n, const uint32_t *data) : Roaring() {
roaring_bitmap_add_many(&roaring, n, data);
}
/**
* Copy constructor
*/
Roaring(const Roaring &r) {
bool is_ok =
ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
roaring_bitmap_get_copy_on_write(&r.roaring));
if (!is_ok) {
throw std::runtime_error("failed memory alloc in constructor");
}
roaring_bitmap_set_copy_on_write(&roaring,
roaring_bitmap_get_copy_on_write(&r.roaring));
}
/**
* Move constructor. The moved object remains valid, i.e.
* all methods can still be called on it.
*/
Roaring(Roaring &&r) noexcept {
roaring = std::move(r.roaring);
ra_init(&r.roaring.high_low_container);
}
/**
* Construct a roaring object from the C struct.
*
* Passing a NULL point is unsafe.
* the pointer to the C struct will be invalid after the call.
*/
Roaring(roaring_bitmap_t *s) noexcept {
// steal the interior struct
roaring.high_low_container = s->high_low_container;
// deallocate the old container
free(s);
}
/**
* Construct a bitmap from a list of integer values.
*/
static Roaring bitmapOf(size_t n, ...) {
Roaring ans;
va_list vl;
va_start(vl, n);
for (size_t i = 0; i < n; i++) {
ans.add(va_arg(vl, uint32_t));
}
va_end(vl);
return ans;
}
/**
* Add value x
*
*/
void add(uint32_t x) { roaring_bitmap_add(&roaring, x); }
/**
* Add value x
* Returns true if a new value was added, false if the value was already existing.
*/
bool addChecked(uint32_t x) {
return roaring_bitmap_add_checked(&roaring, x);
}
/**
* add if all values from x (included) to y (excluded)
*/
void addRange(const uint64_t x, const uint64_t y) {
return roaring_bitmap_add_range(&roaring, x, y);
}
/**
* Add value n_args from pointer vals
*
*/
void addMany(size_t n_args, const uint32_t *vals) {
roaring_bitmap_add_many(&roaring, n_args, vals);
}
/**
* Remove value x
*
*/
void remove(uint32_t x) { roaring_bitmap_remove(&roaring, x); }
/**
* Remove value x
* Returns true if a new value was removed, false if the value was not existing.
*/
bool removeChecked(uint32_t x) {
return roaring_bitmap_remove_checked(&roaring, x);
}
/**
* Return the largest value (if not empty)
*
*/
uint32_t maximum() const { return roaring_bitmap_maximum(&roaring); }
/**
* Return the smallest value (if not empty)
*
*/
uint32_t minimum() const { return roaring_bitmap_minimum(&roaring); }
/**
* Check if value x is present
*/
bool contains(uint32_t x) const {
return roaring_bitmap_contains(&roaring, x);
}
/**
* Check if all values from x (included) to y (excluded) are present
*/
bool containsRange(const uint64_t x, const uint64_t y) const {
return roaring_bitmap_contains_range(&roaring, x, y);
}
/**
* Destructor
*/
~Roaring() { ra_clear(&roaring.high_low_container); }
/**
* Copies the content of the provided bitmap, and
* discard the current content.
*/
Roaring &operator=(const Roaring &r) {
ra_clear(&roaring.high_low_container);
bool is_ok =
ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
roaring_bitmap_get_copy_on_write(&r.roaring));
if (!is_ok) {
throw std::runtime_error("failed memory alloc in assignment");
}
roaring_bitmap_set_copy_on_write(&roaring,
roaring_bitmap_get_copy_on_write(&r.roaring));
return *this;
}
/**
* Moves the content of the provided bitmap, and
* discard the current content.
*/
Roaring &operator=(Roaring &&r) noexcept {
ra_clear(&roaring.high_low_container);
roaring = std::move(r.roaring);
ra_init(&r.roaring.high_low_container);
return *this;
}
/**
* Compute the intersection between the current bitmap and the provided
* bitmap,
* writing the result in the current bitmap. The provided bitmap is not
* modified.
*/
Roaring &operator&=(const Roaring &r) {
roaring_bitmap_and_inplace(&roaring, &r.roaring);
return *this;
}
/**
* Compute the difference between the current bitmap and the provided
* bitmap,
* writing the result in the current bitmap. The provided bitmap is not
* modified.
*/
Roaring &operator-=(const Roaring &r) {
roaring_bitmap_andnot_inplace(&roaring, &r.roaring);
return *this;
}
/**
* Compute the union between the current bitmap and the provided bitmap,
* writing the result in the current bitmap. The provided bitmap is not
* modified.
*
* See also the fastunion function to aggregate many bitmaps more quickly.
*/
Roaring &operator|=(const Roaring &r) {
roaring_bitmap_or_inplace(&roaring, &r.roaring);
return *this;
}
/**
* Compute the symmetric union between the current bitmap and the provided
* bitmap,
* writing the result in the current bitmap. The provided bitmap is not
* modified.
*/
Roaring &operator^=(const Roaring &r) {
roaring_bitmap_xor_inplace(&roaring, &r.roaring);
return *this;
}
/**
* Exchange the content of this bitmap with another.
*/
void swap(Roaring &r) { std::swap(r.roaring, roaring); }
/**
* Get the cardinality of the bitmap (number of elements).
*/
uint64_t cardinality() const {
return roaring_bitmap_get_cardinality(&roaring);
}
/**
* Returns true if the bitmap is empty (cardinality is zero).
*/
bool isEmpty() const { return roaring_bitmap_is_empty(&roaring); }
/**
* Returns true if the bitmap is subset of the other.
*/
bool isSubset(const Roaring &r) const {
return roaring_bitmap_is_subset(&roaring, &r.roaring);
}
/**
* Returns true if the bitmap is strict subset of the other.
*/
bool isStrictSubset(const Roaring &r) const {
return roaring_bitmap_is_strict_subset(&roaring, &r.roaring);
}
/**
* Convert the bitmap to an array. Write the output to "ans",
* caller is responsible to ensure that there is enough memory
* allocated
* (e.g., ans = new uint32[mybitmap.cardinality()];)
*/
void toUint32Array(uint32_t *ans) const {
roaring_bitmap_to_uint32_array(&roaring, ans);
}
/**
* to int array with pagination
*
*/
void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const {
roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans);
}
/**
* Return true if the two bitmaps contain the same elements.
*/
bool operator==(const Roaring &r) const {
return roaring_bitmap_equals(&roaring, &r.roaring);
}
/**
* compute the negation of the roaring bitmap within a specified interval.
* areas outside the range are passed through unchanged.
*/
void flip(uint64_t range_start, uint64_t range_end) {
roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
}
/**
* Remove run-length encoding even when it is more space efficient
* return whether a change was applied
*/
bool removeRunCompression() {
return roaring_bitmap_remove_run_compression(&roaring);
}
/** convert array and bitmap containers to run containers when it is more
* efficient;
* also convert from run containers when more space efficient. Returns
* true if the result has at least one run container.
* Additional savings might be possible by calling shrinkToFit().
*/
bool runOptimize() { return roaring_bitmap_run_optimize(&roaring); }
/**
* If needed, reallocate memory to shrink the memory usage. Returns
* the number of bytes saved.
*/
size_t shrinkToFit() { return roaring_bitmap_shrink_to_fit(&roaring); }
/**
* Iterate over the bitmap elements. The function iterator is called once for
* all the values with ptr (can be NULL) as the second parameter of each call.
*
* roaring_iterator is simply a pointer to a function that returns bool
* (true means that the iteration should continue while false means that it
* should stop), and takes (uint32_t,void*) as inputs.
*/
void iterate(roaring_iterator iterator, void *ptr) const {
roaring_iterate(&roaring, iterator, ptr);
}
/**
* If the size of the roaring bitmap is strictly greater than rank, then
* this function returns true and set element to the element of given rank.
* Otherwise, it returns false.
*/
bool select(uint32_t rnk, uint32_t *element) const {
return roaring_bitmap_select(&roaring, rnk, element);
}
/**
* Computes the size of the intersection between two bitmaps.
*
*/
uint64_t and_cardinality(const Roaring &r) const {
return roaring_bitmap_and_cardinality(&roaring, &r.roaring);
}
/**
* Check whether the two bitmaps intersect.
*
*/
bool intersect(const Roaring &r) const {
return roaring_bitmap_intersect(&roaring, &r.roaring);
}
/**
* Computes the Jaccard index between two bitmaps. (Also known as the
* Tanimoto distance,
* or the Jaccard similarity coefficient)
*
* The Jaccard index is undefined if both bitmaps are empty.
*
*/
double jaccard_index(const Roaring &r) const {
return roaring_bitmap_jaccard_index(&roaring, &r.roaring);
}
/**
* Computes the size of the union between two bitmaps.
*
*/
uint64_t or_cardinality(const Roaring &r) const {
return roaring_bitmap_or_cardinality(&roaring, &r.roaring);
}
/**
* Computes the size of the difference (andnot) between two bitmaps.
*
*/
uint64_t andnot_cardinality(const Roaring &r) const {
return roaring_bitmap_andnot_cardinality(&roaring, &r.roaring);
}
/**
* Computes the size of the symmetric difference (andnot) between two
* bitmaps.
*
*/
uint64_t xor_cardinality(const Roaring &r) const {
return roaring_bitmap_xor_cardinality(&roaring, &r.roaring);
}
/**
* Returns the number of integers that are smaller or equal to x.
*/
uint64_t rank(uint32_t x) const { return roaring_bitmap_rank(&roaring, x); }
/**
* write a bitmap to a char buffer. This is meant to be compatible with
* the
* Java and Go versions. Returns how many bytes were written which should be
* getSizeInBytes().
*
* Setting the portable flag to false enable a custom format that
* can save space compared to the portable format (e.g., for very
* sparse bitmaps).
*
* Boost users can serialize bitmaps in this manner:
*
* BOOST_SERIALIZATION_SPLIT_FREE(Roaring)
* namespace boost {
* namespace serialization {
*
* template <class Archive>
* void save(Archive& ar, const Roaring& bitmask,
* const unsigned int version) {
* std::size_t expected_size_in_bytes = bitmask.getSizeInBytes();
* std::vector<char> buffer(expected_size_in_bytes);
* std::size_t size_in_bytes = bitmask.write(buffer.data());
*
* ar& size_in_bytes;
* ar& boost::serialization::make_binary_object(buffer.data(),
* size_in_bytes);
* }
* template <class Archive>
* void load(Archive& ar, Roaring& bitmask,
* const unsigned int version) {
* std::size_t size_in_bytes = 0;
* ar& size_in_bytes;
* std::vector<char> buffer(size_in_bytes);
* ar& boost::serialization::make_binary_object(buffer.data(),
* size_in_bytes);
* bitmask = Roaring::readSafe(buffer.data(), size_in_bytes);
*}
*} // namespace serialization
*} // namespace boost
*/
size_t write(char *buf, bool portable = true) const {
if (portable)
return roaring_bitmap_portable_serialize(&roaring, buf);
else
return roaring_bitmap_serialize(&roaring, buf);
}
/**
* read a bitmap from a serialized version. This is meant to be compatible
* with the Java and Go versions.
*
* Setting the portable flag to false enable a custom format that
* can save space compared to the portable format (e.g., for very
* sparse bitmaps).
*
* This function is unsafe in the sense that if you provide bad data,
* many, many bytes could be read. See also readSafe.
*/
static Roaring read(const char *buf, bool portable = true) {
roaring_bitmap_t * r = portable ? roaring_bitmap_portable_deserialize(buf) : roaring_bitmap_deserialize(buf);
if (r == NULL) {
throw std::runtime_error("failed alloc while reading");
}
return Roaring(r);
}
/**
* read a bitmap from a serialized version, reading no more than maxbytes bytes.
* This is meant to be compatible with the Java and Go versions.
*
*/
static Roaring readSafe(const char *buf, size_t maxbytes) {
roaring_bitmap_t * r = roaring_bitmap_portable_deserialize_safe(buf,maxbytes);
if (r == NULL) {
throw std::runtime_error("failed alloc while reading");
}
return Roaring(r);
}
/**
* How many bytes are required to serialize this bitmap (meant to be
* compatible
* with Java and Go versions)
*
* Setting the portable flag to false enable a custom format that
* can save space compared to the portable format (e.g., for very
* sparse bitmaps).
*/
size_t getSizeInBytes(bool portable = true) const {
if (portable)
return roaring_bitmap_portable_size_in_bytes(&roaring);
else
return roaring_bitmap_size_in_bytes(&roaring);
}
/**
* Computes the intersection between two bitmaps and returns new bitmap.
* The current bitmap and the provided bitmap are unchanged.
*/
Roaring operator&(const Roaring &o) const {
roaring_bitmap_t *r = roaring_bitmap_and(&roaring, &o.roaring);
if (r == NULL) {
throw std::runtime_error("failed materalization in and");
}
return Roaring(r);
}
/**
* Computes the difference between two bitmaps and returns new bitmap.
* The current bitmap and the provided bitmap are unchanged.
*/
Roaring operator-(const Roaring &o) const {
roaring_bitmap_t *r = roaring_bitmap_andnot(&roaring, &o.roaring);
if (r == NULL) {
throw std::runtime_error("failed materalization in andnot");
}
return Roaring(r);
}
/**
* Computes the union between two bitmaps and returns new bitmap.
* The current bitmap and the provided bitmap are unchanged.
*/
Roaring operator|(const Roaring &o) const {
roaring_bitmap_t *r = roaring_bitmap_or(&roaring, &o.roaring);
if (r == NULL) {
throw std::runtime_error("failed materalization in or");
}
return Roaring(r);
}
/**
* Computes the symmetric union between two bitmaps and returns new bitmap.
* The current bitmap and the provided bitmap are unchanged.
*/
Roaring operator^(const Roaring &o) const {
roaring_bitmap_t *r = roaring_bitmap_xor(&roaring, &o.roaring);
if (r == NULL) {
throw std::runtime_error("failed materalization in xor");
}
return Roaring(r);
}
/**
* Whether or not we apply copy and write.
*/
void setCopyOnWrite(bool val) {
roaring_bitmap_set_copy_on_write(&roaring, val);
}
/**
* Print the content of the bitmap
*/
void printf() const { roaring_bitmap_printf(&roaring); }
/**
* Print the content of the bitmap into a string
*/
std::string toString() const {
struct iter_data {
std::string str;
char first_char = '{';
} outer_iter_data;
if (!isEmpty()) {
iterate(
[](uint32_t value, void *inner_iter_data) -> bool {
((iter_data *)inner_iter_data)->str +=
((iter_data *)inner_iter_data)->first_char;
((iter_data *)inner_iter_data)->str +=
std::to_string(value);
((iter_data *)inner_iter_data)->first_char = ',';
return true;
},
(void *)&outer_iter_data);
} else
outer_iter_data.str = '{';
outer_iter_data.str += '}';
return outer_iter_data.str;
}
/**
* Whether or not copy and write is active.
*/
bool getCopyOnWrite() const {
return roaring_bitmap_get_copy_on_write(&roaring);
}
/**
* computes the logical or (union) between "n" bitmaps (referenced by a
* pointer).
*/
static Roaring fastunion(size_t n, const Roaring **inputs) {
const roaring_bitmap_t **x =
(const roaring_bitmap_t **)malloc(n * sizeof(roaring_bitmap_t *));
if (x == NULL) {
throw std::runtime_error("failed memory alloc in fastunion");
}
for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring;
roaring_bitmap_t *c_ans = roaring_bitmap_or_many(n, x);
if (c_ans == NULL) {
free(x);
throw std::runtime_error("failed memory alloc in fastunion");
}
Roaring ans(c_ans);
free(x);
return ans;
}
typedef RoaringSetBitForwardIterator const_iterator;
/**
* Returns an iterator that can be used to access the position of the
* set bits. The running time complexity of a full scan is proportional to
* the
* number
* of set bits: be aware that if you have long strings of 1s, this can be
* very inefficient.
*
* It can be much faster to use the toArray method if you want to
* retrieve the set bits.
*/
const_iterator begin() const;
/**
* A bogus iterator that can be used together with begin()
* for constructions such as for(auto i = b.begin();
* i!=b.end(); ++i) {}
*/
const_iterator &end() const;
roaring_bitmap_t roaring;
};
/**
* Used to go through the set bits. Not optimally fast, but convenient.
*/
class RoaringSetBitForwardIterator final {
public:
typedef std::forward_iterator_tag iterator_category;
typedef uint32_t *pointer;
typedef uint32_t &reference_type;
typedef uint32_t value_type;
typedef int32_t difference_type;
typedef RoaringSetBitForwardIterator type_of_iterator;
/**
* Provides the location of the set bit.
*/
value_type operator*() const { return i.current_value; }
bool operator<(const type_of_iterator &o) {
if (!i.has_value) return false;
if (!o.i.has_value) return true;
return i.current_value < *o;
}
bool operator<=(const type_of_iterator &o) {
if (!o.i.has_value) return true;
if (!i.has_value) return false;
return i.current_value <= *o;
}
bool operator>(const type_of_iterator &o) {
if (!o.i.has_value) return false;
if (!i.has_value) return true;
return i.current_value > *o;
}
bool operator>=(const type_of_iterator &o) {
if (!i.has_value) return true;
if (!o.i.has_value) return false;
return i.current_value >= *o;
}
/**
* Move the iterator to the first value >= val.
*/
void equalorlarger(uint32_t val) {
roaring_move_uint32_iterator_equalorlarger(&i,val);
}
type_of_iterator &operator++() { // ++i, must returned inc. value
roaring_advance_uint32_iterator(&i);
return *this;
}
type_of_iterator operator++(int) { // i++, must return orig. value
RoaringSetBitForwardIterator orig(*this);
roaring_advance_uint32_iterator(&i);
return orig;
}
type_of_iterator& operator--() { // prefix --
roaring_previous_uint32_iterator(&i);
return *this;
}
type_of_iterator operator--(int) { // postfix --
RoaringSetBitForwardIterator orig(*this);
roaring_previous_uint32_iterator(&i);
return orig;
}
bool operator==(const RoaringSetBitForwardIterator &o) const {
return i.current_value == *o && i.has_value == o.i.has_value;
}
bool operator!=(const RoaringSetBitForwardIterator &o) const {
return i.current_value != *o || i.has_value != o.i.has_value;
}
RoaringSetBitForwardIterator(const Roaring &parent,
bool exhausted = false) {
if (exhausted) {
i.parent = &parent.roaring;
i.container_index = INT32_MAX;
i.has_value = false;
i.current_value = UINT32_MAX;
} else {
roaring_init_iterator(&parent.roaring, &i);
}
}
roaring_uint32_iterator_t i;
};
inline RoaringSetBitForwardIterator Roaring::begin() const {
return RoaringSetBitForwardIterator(*this);
}
inline RoaringSetBitForwardIterator &Roaring::end() const {
static RoaringSetBitForwardIterator e(*this, true);
return e;
}
#endif /* INCLUDE_ROARING_HH_ */
|