File: bpt_file.pyx

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (76 lines) | stat: -rw-r--r-- 2,570 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from bx.misc.binary_file import BinaryFileReader

DEF bpt_sig = 0x78CA8C91

# bptFileHeaderSize = 32
# bptBlockHeaderSize = 4

cdef class BPTFile:
    """
    On disk B+ tree compatible with Jim Kent's bPlusTree.c
    """

    def __init__(self, file=None):
        if file is not None:
            self.attach(file)

    def attach(self, file):
        """
        Attach to an open file
        """
        self.file = file
        self.reader = reader = BinaryFileReader(file, bpt_sig)
        self.is_byteswapped = self.reader.byteswap_needed
        # Read header stuff
        self.block_size = reader.read_uint32()
        self.key_size = reader.read_uint32()
        self.value_size = reader.read_uint32()
        self.item_count = reader.read_uint64()
        reader.skip(8)
        self.root_offset = reader.tell()

    def r_find(self, bits64 block_start, key):
        """
        Recursively seek the value matching key under the subtree starting
        at file offset `block_start`
        """
        cdef UBYTE is_leaf
        cdef bits16 child_count
        cdef bits64 offset
        self.reader.seek(block_start)
        # Block header
        is_leaf = self.reader.read_uint8()
        self.reader.read_uint8()
        child_count = self.reader.read_uint16()
        if is_leaf:
            for i from 0 <= i < child_count:
                node_key = self.reader.read(self.key_size)
                node_value = self.reader.read(self.value_size)
                if node_key == key:
                    return node_value
            return None
        else:
            # Read and discard first key, store offset
            self.reader.read(self.key_size)
            offset = self.reader.read_uint64()
            # Loop until correct subtree is found
            for i from 0 <= i < child_count - 1:
                node_key = self.reader.read(self.key_size)
                if node_key > key:
                    break
                offset = self.reader.read_uint64()
            return self.r_find(offset, key)

    def find(self, key):
        """
        Find the value matching `key` (a bytestring). Returns the matching
        value as a bytestring if found, or None
        """
        # Key is greater than key_size, must not be a match
        if len(key) > self.key_size:
            return None
        # Key is less than key_size, right pad with 0 bytes
        if len(key) < self.key_size:
            key += b'\0' * (self.key_size - len(key))
        # Call the recursive finder
        return self.r_find(self.root_offset, key)