1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
from bx.misc.binary_file import BinaryFileReader
DEF bpt_sig = 0x78CA8C91
# bptFileHeaderSize = 32
# bptBlockHeaderSize = 4
cdef class BPTFile:
"""
On disk B+ tree compatible with Jim Kent's bPlusTree.c
"""
def __init__(self, file=None):
if file is not None:
self.attach(file)
def attach(self, file):
"""
Attach to an open file
"""
self.file = file
self.reader = reader = BinaryFileReader(file, bpt_sig)
self.is_byteswapped = self.reader.byteswap_needed
# Read header stuff
self.block_size = reader.read_uint32()
self.key_size = reader.read_uint32()
self.value_size = reader.read_uint32()
self.item_count = reader.read_uint64()
reader.skip(8)
self.root_offset = reader.tell()
def r_find(self, bits64 block_start, key):
"""
Recursively seek the value matching key under the subtree starting
at file offset `block_start`
"""
cdef UBYTE is_leaf
cdef bits16 child_count
cdef bits64 offset
self.reader.seek(block_start)
# Block header
is_leaf = self.reader.read_uint8()
self.reader.read_uint8()
child_count = self.reader.read_uint16()
if is_leaf:
for i from 0 <= i < child_count:
node_key = self.reader.read(self.key_size)
node_value = self.reader.read(self.value_size)
if node_key == key:
return node_value
return None
else:
# Read and discard first key, store offset
self.reader.read(self.key_size)
offset = self.reader.read_uint64()
# Loop until correct subtree is found
for i from 0 <= i < child_count - 1:
node_key = self.reader.read(self.key_size)
if node_key > key:
break
offset = self.reader.read_uint64()
return self.r_find(offset, key)
def find(self, key):
"""
Find the value matching `key` (a bytestring). Returns the matching
value as a bytestring if found, or None
"""
# Key is greater than key_size, must not be a match
if len(key) > self.key_size:
return None
# Key is less than key_size, right pad with 0 bytes
if len(key) < self.key_size:
key += b'\0' * (self.key_size - len(key))
# Call the recursive finder
return self.r_find(self.root_offset, key)
|