1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
|
# This file is part of Invenio.
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2013, 2014, 2015, 2016 CERN.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# cython: infer_types=True
# cython: language_level=2
"""
Defines an intbitset data object to hold unordered sets of unsigned
integers with ultra fast set operations, implemented via bit vectors
and Python C extension to optimize speed and memory usage.
Emulates the Python built-in set class interface with some additional
specific methods such as its own fast dump and load marshalling
functions. Uses real bits to optimize memory usage, so may have
issues with endianness if you transport serialized bitsets between
various machine architectures.
Please note that no bigger than __maxelem__ elements can be added to
an intbitset and, if CFG_INTBITSET_ENABLE_SANITY_CHECKS is disabled,
you will receive unpredictable results.
Note to developers: If you make modification to this file you
have to manually regenerate intbitset.c by running:
$ cython intbitset.pyx
and then commit generated intbitset.c.
"""
#cython: infer_types=True
import sys
import zlib
from array import array
from cpython.buffer cimport PyBUF_SIMPLE, Py_buffer, PyObject_GetBuffer, PyBuffer_Release
from cpython.ref cimport PyObject
CFG_INTBITSET_ENABLE_SANITY_CHECKS = False
from intbitset_helper import _
from intbitset_version import __version__
__all__ = ['intbitset', '__version__']
cdef extern from *:
## See: <http://wiki.cython.org/FAQ/#HowdoIuse.27const.27.3F>
## In order to avoid warnings with PyObject_AsReadBuffer
ctypedef void* const_void_ptr "const void*"
cdef extern from "intbitset.h":
ctypedef int Py_ssize_t
object PyBytes_FromStringAndSize(char *s, Py_ssize_t len)
object PyString_FromStringAndSize(char *s, Py_ssize_t len)
cdef extern from "intbitset.h":
ctypedef unsigned long long int word_t
#ctypedef unsigned char bool_t
ctypedef struct IntBitSet:
int size
int allocated
word_t trailing_bits
int tot
word_t *bitset
int wordbytesize
int wordbitsize
int maxelem
IntBitSet *intBitSetCreate(int size, bint trailing_bits)
IntBitSet *intBitSetCreateFromBuffer(void *buf, int bufsize)
IntBitSet *intBitSetResetFromBuffer(IntBitSet *bitset, void *buf, int bufsize)
IntBitSet *intBitSetReset(IntBitSet *bitset)
void intBitSetDestroy(IntBitSet *bitset)
IntBitSet *intBitSetClone(IntBitSet *bitset)
int intBitSetGetSize(IntBitSet *bitset)
int intBitSetGetAllocated(IntBitSet *bitset)
int intBitSetGetTot(IntBitSet * bitset)
bint intBitSetIsInElem(IntBitSet *bitset, unsigned int elem)
void intBitSetAddElem(IntBitSet *bitset, unsigned int elem)
void intBitSetDelElem(IntBitSet *bitset, unsigned int elem)
bint intBitSetEmpty(IntBitSet *bitset)
IntBitSet *intBitSetUnion(IntBitSet *x, IntBitSet *y)
IntBitSet *intBitSetIntersection(IntBitSet *x, IntBitSet *y)
IntBitSet *intBitSetSub(IntBitSet *x, IntBitSet *y)
IntBitSet *intBitSetXor(IntBitSet *x, IntBitSet *y)
IntBitSet *intBitSetIUnion(IntBitSet *dst, IntBitSet *src)
IntBitSet *intBitSetIIntersection(IntBitSet *dst, IntBitSet *src)
IntBitSet *intBitSetISub(IntBitSet *x, IntBitSet *y)
IntBitSet *intBitSetIXor(IntBitSet *x, IntBitSet *y)
bint intBitSetIsDisjoint(IntBitSet *x, IntBitSet *y)
int intBitSetGetNext(IntBitSet *x, int last)
int intBitSetGetLast(IntBitSet *x)
unsigned char intBitSetCmp(IntBitSet *x, IntBitSet *y)
__maxelem__ = maxelem
cdef class intbitset:
"""
Defines an intbitset data object to hold unordered sets of
unsigned integers with ultra fast set operations, implemented via
bit vectors and Python C extension to optimize speed and memory
usage.
Emulates the Python built-in set class interface with some
additional specific methods such as its own fast dump and load
marshalling functions. Uses real bits to optimize memory usage,
so may have issues with endianness if you transport serialized
bitsets between various machine architectures.
The constructor accept the following parameters:
rhs=0,
int preallocate=-1,
int trailing_bits=0,
bint sanity_checks=CFG_INTBITSET_ENABLE_SANITY_CHECKS,
int no_allocate=0:
where rhs can be:
* ``int/long`` for creating allocating empty intbitset that will hold at
least rhs elements, before being resized
* ``intbitset`` for cloning
* ``bytes`` for retrieving an intbitset that was dumped into a byte string
* ``array`` for retrieving an intbitset that was dumped into a string stored
in an array
* sequence made of integers for copying all the elements from the sequence.
If minsize is specified than it is initially allocated enough space to
hold up to minsize integers, otherwise the biggest element of the sequence
will be used.
* sequence made of tuples: then the first element of each tuple is
considered as an integer (as in the sequence made of integers).
The other arguments are:
* ``preallocate`` is a suggested initial upper bound on the numbers that
will be stored, by looking at rhs a sequence of number.
* ``trailing_bits`` is 1, then the set will contain "all" the positive integers
* ``no_allocate`` and ``sanity_checks`` are used internally and should never be set.
"""
cdef IntBitSet *bitset
cdef bint sanity_checks
def __cinit__(
self not None,
rhs=0,
int preallocate=-1,
int trailing_bits=0,
bint sanity_checks=CFG_INTBITSET_ENABLE_SANITY_CHECKS,
int no_allocate=0,
):
cdef Py_ssize_t size = 0
cdef const_void_ptr buf = NULL
cdef int elem
cdef int last
cdef int remelem
cdef bint tuple_of_tuples
cdef Py_buffer view
self.sanity_checks = sanity_checks
#print >> sys.stderr, "intbitset.__cinit__ is called"
msg = "Error"
self.bitset = NULL
try:
if no_allocate:
return
if type(rhs) in (int, long):
if rhs < 0:
raise ValueError("rhs can't be negative")
self.bitset = intBitSetCreate(rhs, trailing_bits)
elif type(rhs) is intbitset:
self.bitset = intBitSetClone((<intbitset>rhs).bitset)
elif type(rhs) in (bytes, array):
try:
if type(rhs) is array:
rhs = rhs.tobytes()
tmp = zlib.decompress(rhs)
if PyObject_GetBuffer(tmp, &view, PyBUF_SIMPLE) != 0:
raise ValueError("Unable to get buffer")
try:
buf = <const_void_ptr>view.buf
size = view.len
if (size % wordbytesize):
## Wrong size!
raise Exception()
self.bitset = intBitSetCreateFromBuffer(buf, size)
finally:
PyBuffer_Release(&view)
except Exception as e:
raise ValueError("rhs is corrupted: %s" % str(e))
elif hasattr(rhs, '__iter__'):
tuple_of_tuples = (
rhs
and hasattr(rhs, '__getitem__')
and hasattr(rhs[0], '__getitem__')
)
try:
if preallocate < 0:
if rhs and (not hasattr(rhs, '__getitem__') or type(rhs[0]) is int):
try:
preallocate = max(rhs)
except ValueError:
preallocate = 0
else:
preallocate = 0
if self.sanity_checks:
if not (0 <= preallocate < maxelem):
raise OverflowError("Can't store integers bigger than %s" % maxelem)
self.bitset = intBitSetCreate(preallocate, trailing_bits)
if trailing_bits:
last = 0
if self.sanity_checks:
if tuple_of_tuples:
for tmp_tuple in rhs:
elem = tmp_tuple[0]
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
for remelem from last <= remelem < elem:
intBitSetDelElem(self.bitset, remelem)
last = elem + 1
else:
for elem in rhs:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
for remelem from last <= remelem < elem:
intBitSetDelElem(self.bitset, remelem)
last = elem + 1
else:
if tuple_of_tuples:
for tmp_tuple in rhs:
elem = tmp_tuple[0]
for remelem from last <= remelem < elem:
intBitSetDelElem(self.bitset, remelem)
last = elem + 1
else:
for elem in rhs:
for remelem from last <= remelem < elem:
intBitSetDelElem(self.bitset, remelem)
last = elem + 1
else:
if self.sanity_checks:
if tuple_of_tuples:
for tmp_tuple in rhs:
elem = tmp_tuple[0]
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
intBitSetAddElem(self.bitset, elem)
else:
for elem in rhs:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
intBitSetAddElem(self.bitset, elem)
else:
if tuple_of_tuples:
for tmp_tuple in rhs:
elem = tmp_tuple[0]
intBitSetAddElem(self.bitset, elem)
else:
for elem in rhs:
intBitSetAddElem(self.bitset, elem)
except Exception as e:
raise ValueError("retrieving integers from rhs is impossible: %s" % str(e))
else:
raise TypeError("rhs is of unknown type %s" % type(rhs))
except:
intBitSetDestroy(self.bitset)
raise
def __dealloc__(self not None):
#print >> sys.stderr, "intbitset.__dealloc__ is called"
intBitSetDestroy(self.bitset)
def __contains__(self not None, int elem):
if self.sanity_checks:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Element must be <= %s" % maxelem)
return intBitSetIsInElem(self.bitset, elem) != 0
def __cmp__(self not None, intbitset rhs not None):
raise TypeError("cannot compare intbitset using cmp()")
def __richcmp__(self not None, rhs, int op):
if not isinstance(self, intbitset) or not isinstance(rhs, intbitset):
return False
cdef short unsigned int tmp
tmp = intBitSetCmp((<intbitset>self).bitset, (<intbitset>rhs).bitset)
if op == 0: # <
return tmp == 1
if op == 1: # <=
return tmp <= 1
if op == 2: # ==
return tmp == 0
if op == 3: # !=
return tmp > 0
if op == 4: # >
return tmp == 2
if op == 5: # >=
return tmp in (0, 2)
def __len__(self not None):
return intBitSetGetTot(self.bitset)
def __hash__(self not None):
return hash(
PyBytes_FromStringAndSize(
<char *>self.bitset.bitset,
wordbytesize * (intBitSetGetTot(self.bitset) / wordbitsize + 1)
)
)
def __nonzero__(self not None):
return not intBitSetEmpty(self.bitset)
def __deepcopy__(self not None, memo):
return intbitset(self)
def __delitem__(self not None, int elem):
if self.sanity_checks:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Element must be <= %s" % maxelem)
intBitSetDelElem(self.bitset, elem)
def __iadd__(self not None, rhs):
cdef int elem
if isinstance(rhs, (int, long)):
if self.sanity_checks:
if rhs < 0:
raise ValueError("Negative numbers, not allowed")
elif rhs > maxelem:
raise OverflowError("rhs must be <= %s" % maxelem)
intBitSetAddElem(self.bitset, rhs)
elif isinstance(rhs, intbitset):
intBitSetIUnion(self.bitset, (<intbitset> rhs).bitset)
else:
if self.sanity_checks:
for elem in rhs:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
intBitSetAddElem(self.bitset, elem)
else:
for elem in rhs:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
intBitSetAddElem(self.bitset, elem)
return self
def __isub__(self not None, rhs not None):
"""Remove all elements of another set from this set."""
cdef int elem
if isinstance(rhs, (int, long)):
if self.sanity_checks:
if rhs < 0:
raise ValueError("Negative numbers, not allowed")
elif rhs > maxelem:
raise OverflowError("rhs must be <= %s" % maxelem)
intBitSetDelElem(self.bitset, rhs)
elif isinstance(rhs, intbitset):
intBitSetISub(self.bitset, (<intbitset> rhs).bitset)
else:
if self.sanity_checks:
for elem in rhs:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
intBitSetDelElem(self.bitset, elem)
else:
for elem in rhs:
intBitSetDelElem(self.bitset, elem)
return self
def __sub__(self not None, intbitset rhs not None):
"""Return the difference of two intbitsets as a new set.
(i.e. all elements that are in this intbitset but not the other.)
"""
cdef intbitset ret = intbitset(no_allocate=1)
(<intbitset>ret).bitset = intBitSetSub((<intbitset> self).bitset, rhs.bitset)
return ret
def __and__(self not None, intbitset rhs not None):
"""Return the intersection of two intbitsets as a new set.
(i.e. all elements that are in both intbitsets.)
"""
cdef intbitset ret = intbitset(no_allocate=1)
(<intbitset>ret).bitset = intBitSetIntersection((<intbitset> self).bitset, rhs.bitset)
return ret
def __iand__(self not None, intbitset rhs not None):
"""Update a intbitset with the intersection of itself and another."""
intBitSetIIntersection(self.bitset, rhs.bitset)
return self
def __or__(self not None, intbitset rhs not None):
"""Return the union of two intbitsets as a new set.
(i.e. all elements that are in either intbitsets.)
"""
cdef intbitset ret = intbitset(no_allocate=1)
(<intbitset>ret).bitset = intBitSetUnion((<intbitset> self).bitset, rhs.bitset)
return ret
def __ior__(self not None, intbitset rhs not None):
"""Update a intbitset with the union of itself and another."""
intBitSetIUnion(self.bitset, rhs.bitset)
return self
def __xor__(self not None, intbitset rhs not None):
"""Return the symmetric difference of two sets as a new set.
(i.e. all elements that are in exactly one of the sets.)
"""
cdef intbitset ret = intbitset(no_allocate=1)
(<intbitset>ret).bitset = intBitSetXor((<intbitset> self).bitset, rhs.bitset)
return ret
def __ixor__(self not None, intbitset rhs not None):
"""Update an intbitset with the symmetric difference of itself and another.
"""
intBitSetIXor(self.bitset, rhs.bitset)
return self
def __repr__(self not None):
finite_list = self.extract_finite_list()
if self.bitset.trailing_bits:
return "intbitset(%s, trailing_bits=True)" % repr(finite_list)
else:
return "intbitset(%s)" % repr(finite_list)
def __str__(self not None):
cdef int tot
tot = intBitSetGetTot(self.bitset)
if tot < 0:
return "intbitset([...], trailing_bits=True)"
elif tot > 10:
begin_list = self[0:5]
end_list = self[tot - 5:tot]
ret = "intbitset(["
for n in begin_list:
ret += '%i, ' % n
ret += "..., "
for n in end_list:
ret += '%i, ' % n
ret = ret[:-2]
ret += '])'
return ret
else:
return self.__repr__()
def __getitem__(self not None, object key):
cdef Py_ssize_t i
cdef int elem = -1
cdef int start
cdef int end
cdef int step
if hasattr(key, 'indices'):
## This is a slice object!
if self.bitset.trailing_bits and (key.start < 0 or key.stop < 0):
raise IndexError("negative indexes are not allowed on infinite intbitset")
retset = intbitset()
start, end, step = key.indices(intBitSetGetTot(self.bitset))
if step < 0:
raise ValueError("negative steps are not yet supported")
for i in range(start):
elem = intBitSetGetNext(self.bitset, elem)
if elem < 0:
return retset
for i in range(end - start):
elem = intBitSetGetNext(self.bitset, elem)
if elem < 0:
return retset
if i % step == 0:
retset.add(elem)
return retset
else:
end = key
if end < 0:
if self.bitset.trailing_bits:
raise IndexError("negative indexes are not allowed on infinite intbitset")
end += intBitSetGetTot(self.bitset)
if end < 0:
raise IndexError("intbitset index out of range")
if end >= intBitSetGetTot(self.bitset):
raise IndexError("intbitset index out of range")
for i in range(end + 1):
elem = intBitSetGetNext(self.bitset, elem)
return elem
# pickle interface
def __reduce__(self not None):
return _, (self.fastdump(),)
__safe_for_unpickling__ = True
# Iterator interface
def __iter__(self not None):
if self.bitset.trailing_bits:
raise OverflowError("It's impossible to iterate over an infinite set.")
return intbitset_iterator(self)
# Customized interface
cpdef add(intbitset self, int elem):
"""Add an element to a set.
This has no effect if the element is already present."""
if self.sanity_checks:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Element must be <= %s" % maxelem)
intBitSetAddElem(self.bitset, elem)
cpdef clear(intbitset self):
intBitSetReset(self.bitset)
cpdef discard(intbitset self, int elem):
"""Remove an element from a intbitset if it is a member.
If the element is not a member, do nothing."""
if self.sanity_checks:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Element must be <= %s" % maxelem)
intBitSetDelElem(self.bitset, elem)
symmetric_difference = __xor__
symmetric_difference_update = __ixor__
cpdef issubset(intbitset self, rhs):
"""Report whether another set contains this set."""
return self.__le__(rhs)
cpdef issuperset(intbitset self, rhs):
"""Report whether this set contains another set."""
return self.__ge__(rhs)
# Dumping & Loading
cpdef fastdump(intbitset self):
"""Return a compressed string representation suitable to be saved
somewhere."""
cdef Py_ssize_t size
size = intBitSetGetSize((<intbitset> self).bitset)
tmp = PyBytes_FromStringAndSize(<char *>self.bitset.bitset, ( size + 1) * wordbytesize)
return zlib.compress(tmp)
cpdef fastload(intbitset self, strdump):
"""Load a compressed string representation produced by a previous call
to the fastdump method into the current intbitset. The previous content
will be replaced."""
cdef Py_ssize_t size
cdef const_void_ptr buf
cdef Py_buffer view
buf = NULL
size = 0
try:
if type(strdump) is array:
strdump = strdump.tostring()
# tmp needed to not be garbage collected
tmp = zlib.decompress(strdump)
if PyObject_GetBuffer(tmp, &view, PyBUF_SIMPLE) != 0:
raise ValueError("Unable to get buffer")
try:
buf = <const_void_ptr>view.buf
size = view.len
if (size % wordbytesize):
## Wrong size!
raise Exception()
intBitSetResetFromBuffer((<intbitset> self).bitset, buf, size)
finally:
PyBuffer_Release(&view)
except:
raise ValueError("strdump is corrupted")
cpdef copy(intbitset self):
"""Return a shallow copy of a set."""
return intbitset(self)
cpdef pop(intbitset self):
"""Remove and return an arbitrary set element.
Note: intbitset implementation of .pop() differs from the native ``set``
implementation by guaranteeing returning always the largest element.
"""
cdef int ret
ret = intBitSetGetLast(self.bitset)
if ret < 0:
raise KeyError("pop from an empty or infinite intbitset")
intBitSetDelElem(self.bitset, ret)
return ret
cpdef remove(intbitset self, int elem):
"""Remove an element from a set; it must be a member.
If the element is not a member, raise a KeyError.
"""
if self.sanity_checks:
if elem < 0:
raise ValueError("Negative numbers, not allowed")
elif elem > maxelem:
raise OverflowError("Elements must be <= %s" % maxelem)
if intBitSetIsInElem(self.bitset, elem):
intBitSetDelElem(self.bitset, elem)
else:
raise KeyError(elem)
cpdef strbits(intbitset self):
"""Return a string of 0s and 1s representing the content in memory
of the intbitset.
"""
cdef int i
cdef int last
if (<intbitset> self).bitset.trailing_bits:
raise OverflowError("It's impossible to print an infinite set.")
last = 0
ret = []
for i in self:
ret.append('0'*(i-last)+'1')
last = i+1
return ''.join(ret)
def update(self not None, *args):
"""Update the intbitset, adding elements from all others."""
cdef intbitset iarg
for arg in args:
iarg = arg if hasattr(arg, "bitset") else intbitset(arg)
intBitSetIUnion(self.bitset, iarg.bitset)
union_update = update
def intersection_update(self not None, *args):
"""Update the intbitset, keeping only elements found in it and all others."""
cdef intbitset iarg
for arg in args:
iarg = arg if hasattr(arg, "bitset") else intbitset(arg)
intBitSetIIntersection(self.bitset, iarg.bitset)
def difference_update(self not None, *args):
"""Update the intbitset, removing elements found in others."""
cdef intbitset iarg
for arg in args:
iarg = arg if hasattr(arg, "bitset") else intbitset(arg)
intBitSetISub(self.bitset, iarg.bitset)
def union(self not None, *args):
"""Return a new intbitset with elements from the intbitset and all others."""
cdef intbitset ret = intbitset(self)
cdef intbitset iarg
for arg in args:
iarg = arg if hasattr(arg, "bitset") else intbitset(arg)
intBitSetIUnion(ret.bitset, iarg.bitset)
return ret
def intersection(self not None, *args):
"""Return a new intbitset with elements common to the intbitset and all others."""
cdef intbitset ret = intbitset(self)
cdef intbitset iarg
for arg in args:
iarg = arg if hasattr(arg, "bitset") else intbitset(arg)
intBitSetIIntersection(ret.bitset, iarg.bitset)
return ret
def difference(self not None, *args):
"""Return a new intbitset with elements from the intbitset that are not in the others."""
cdef intbitset ret = intbitset(self)
cdef intbitset iarg
for arg in args:
iarg = arg if hasattr(arg, "bitset") else intbitset(arg)
intBitSetISub(ret.bitset, iarg.bitset)
return ret
def isdisjoint(self not None, intbitset rhs not None):
"""Return True if two intbitsets have a null intersection."""
return intBitSetIsDisjoint(self.bitset, rhs.bitset)
cpdef update_with_signs(intbitset self, rhs):
"""Given a dictionary rhs whose keys are integers, remove all the integers
whose value are less than 0 and add every integer whose value is 0 or more"""
cdef int value
try:
if self.sanity_checks:
for value, sign in rhs.iteritems():
if value < 0:
raise ValueError("Negative numbers, not allowed")
elif value > maxelem:
raise OverflowError("Elements must <= %s" % maxelem)
if sign < 0:
intBitSetDelElem(self.bitset, value)
else:
intBitSetAddElem(self.bitset, value)
else:
for value, sign in rhs.iteritems():
if sign < 0:
intBitSetDelElem(self.bitset, value)
else:
intBitSetAddElem(self.bitset, value)
except AttributeError:
raise TypeError("rhs should be a valid dictionary with integers keys and integer values")
cpdef get_size(intbitset self):
return intBitSetGetSize(self.bitset)
cpdef get_allocated(intbitset self):
return intBitSetGetAllocated(self.bitset)
cpdef is_infinite(intbitset self):
"""Return True if the intbitset is infinite. (i.e. trailing_bits=True
was used in the constructor.)"""
return self.bitset.trailing_bits != 0
cpdef extract_finite_list(intbitset self, int up_to=-1):
"""Return a finite list of elements sufficient to be passed to intbitset
constructor toghether with the proper value of trailing_bits in order
to reproduce this intbitset. At least up_to integer are looked for when
they are inside the intbitset but not necessarily needed to build the
intbitset"""
cdef int true_up_to
cdef int last
if self.sanity_checks and up_to > maxelem:
raise OverflowError("up_to must be <= %s" % maxelem)
ret = []
true_up_to = max(up_to, (intBitSetGetSize(self.bitset)) * wordbitsize)
last = -1
while last < true_up_to:
last = intBitSetGetNext(self.bitset, last)
if last == -2:
break
ret.append(last)
return ret
cpdef get_wordbitsize(intbitset self):
return wordbitsize
cpdef get_wordbytsize(intbitset self):
return wordbytesize
cpdef tolist(intbitset self):
"""Legacy method to retrieve a list of all the elements inside an
intbitset.
"""
if self.bitset.trailing_bits:
raise OverflowError("It's impossible to retrieve a list of an infinite set")
return self.extract_finite_list()
cdef object __weakref__
cdef class intbitset_iterator:
cdef int last
cdef IntBitSet *bitset
cdef bint sanity_checks
def __cinit__(intbitset_iterator self, intbitset bitset not None):
#print >> sys.stderr, "intbitset_iterator.__cinit__ is called"
self.last = -1
## A copy should be performed, in case the original bitset disappears
## as in "for x in intbitset([1,2,3])"!
self.bitset = intBitSetClone(bitset.bitset)
self.sanity_checks = CFG_INTBITSET_ENABLE_SANITY_CHECKS
def __dealloc__(intbitset_iterator self):
#print >> sys.stderr, "intbitset_iterator.__dealloc__ is called"
intBitSetDestroy(self.bitset)
def __next__(intbitset_iterator self):
if self.last == -2:
raise StopIteration()
self.last = intBitSetGetNext(self.bitset, self.last)
if self.sanity_checks and (self.bitset.allocated < self.bitset.size):
raise MemoryError(
"intbitset corrupted: allocated: %s, size: %s"
% (self.bitset.allocated, self.bitset.size)
)
if self.last < 0:
self.last = -2
raise StopIteration()
return self.last
def __iter__(intbitset_iterator self):
return self
cdef object __weakref__
|