File: unbcf_fast.pyx

package info (click to toggle)
python-rosettasciio 0.7.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 144,644 kB
  • sloc: python: 36,638; xml: 2,582; makefile: 20; ansic: 4
file content (331 lines) | stat: -rw-r--r-- 12,210 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import cython
import numpy as np
import sys

cdef int byte_order
if sys.byteorder == 'little':
    byte_order = 0
else:
    byte_order = 1

from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t

# fused unsigned integer type for generalised programing:

ctypedef fused channel_t:
    uint8_t
    uint16_t
    uint32_t


# instructivelly packed array structs:

cdef packed struct Bunch_head:  # size 2bytes
    uint8_t size
    uint8_t channels

# endianess agnostic reading functions... probably very slow:

@cython.boundscheck(False)
cdef uint16_t read_16(unsigned char *pointer):

    return ((<uint16_t>pointer[1]<<8) & 0xff00) | <uint16_t>pointer[0]

@cython.boundscheck(False)
cdef uint32_t read_32(unsigned char *pointer):

    return ((<uint32_t>pointer[3]<<24) & <uint32_t>0xff000000) |\
           ((<uint32_t>pointer[2]<<16) & <uint32_t>0xff0000) |\
           ((<uint32_t>pointer[1]<<8) & <uint32_t>0xff00) |\
             <uint32_t>pointer[0]

@cython.boundscheck(False)
cdef uint64_t read_64(unsigned char *pointer):
    # skiping the most high bits, as such a huge values is impossible
    # for present bruker technology. If it would change - uncomment bellow and recompile.
    #return ((<uint64_t>pointer[7]<<56) & <uint64_t>0xff00000000000000) |\
    #       ((<uint64_t>pointer[6]<<48) & <uint64_t>0xff000000000000) |\
    #       ((<uint64_t>pointer[5]<<40) & <uint64_t>0xff0000000000) |\
    return ((<uint64_t>pointer[4]<<32) & <uint64_t>0xff00000000) |\
           ((<uint64_t>pointer[3]<<24) & <uint64_t>0xff000000) |\
           ((<uint64_t>pointer[2]<<16) & <uint64_t>0xff0000) |\
           ((<uint64_t>pointer[1]<<8) & <uint64_t>0xff00) |\
             <uint64_t>pointer[0]


# datastream class:

@cython.boundscheck(False)
cdef class DataStream:

    cdef unsigned char *buffer2
    cdef int size, size_chnk
    cdef int offset
    cdef bytes raw_bytes
    cdef public object blocks  # public - because it is python object

    def __cinit__(self, blocks, int size_chnk):
        self.size_chnk = size_chnk
        self.size = size_chnk
        self.offset = 0

    def __init__(self, blocks, int size_chnk):
        self.blocks = blocks
        self.raw_bytes = next(self.blocks)  # python bytes buffer
        self.buffer2 = <bytes>self.raw_bytes  # C unsigned char buffer

    cdef void seek(self, int value):
        """move offset to given value.
        NOTE: it do not check if value is in bounds of buffer!"""
        self.offset = value

    cdef void skip(self, int length):
        """increase offset by given value,
        check if new offset is in bounds of buffer length
        else load up next block"""
        if (self.offset + length) > self.size:
            self.load_next_block()
        self.offset = self.offset + length

    cdef uint8_t read_8(self):
        if (self.offset + 1) > self.size:
            self.load_next_block()
        self.offset += 1
        return <uint8_t>self.buffer2[self.offset-1]

    cdef uint16_t read_16(self):
        if (self.offset + 2) > self.size:
            self.load_next_block()
        self.offset += 2
        # endianess agnostic way... probably very slow:
        return read_16(&self.buffer2[self.offset-2])

    cdef uint32_t read_32(self):
        if (self.offset + 4) > self.size:
            self.load_next_block()
        self.offset += 4
        # endianess agnostic way... probably very slow:
        return read_32(&self.buffer2[self.offset-4])

    cdef uint64_t read_64(self):
        if (self.offset + 8) > self.size:
            self.load_next_block()
        self.offset += 8
        return read_64(&self.buffer2[self.offset-8])

    cdef unsigned char *ptr_to(self, int length):
        """get the pointer to the raw buffer,
        making sure the array have the required length
        counting from the offset, increase the internal offset
        by given length"""
        if (self.offset + length) > self.size:
            self.load_next_block()
        self.offset += length
        return &self.buffer2[self.offset-length]

    cdef void load_next_block(self):
        """take the reminder of buffer (offset:end) and
        append new block of raw data, and overwrite old buffer
        handle with new, set offset to 0"""
        self.size = self.size_chnk + self.size - self.offset
        self.buffer2 = b''
        self.raw_bytes = self.raw_bytes[self.offset:] + next(self.blocks)
        self.offset = 0
        self.buffer2 = <bytes>self.raw_bytes


# function for looping throught the bcf pixels:

@cython.cdivision(True)
@cython.boundscheck(False)
cdef bin_to_numpy(DataStream data_stream,
                  channel_t[:, :, :] hypermap,
                  int max_chan,
                  int downsample):

    cdef uint32_t height, width, pix_in_line, pixel_x, add_pulse_size
    cdef uint32_t dummy1, line_cnt, data_size2
    cdef uint16_t chan1, chan2, flag, data_size1, n_of_pulses
    cdef uint16_t add_val, j

    height = data_stream.read_32()
    width = data_stream.read_32()
    data_stream.seek(<int>0x1A0) #the begining of the array
    for line_cnt in range(height):
        pix_in_line = data_stream.read_32()
        for dummy1 in range(pix_in_line):
            pixel_x = data_stream.read_32()
            chan1 = data_stream.read_16()
            chan2 = data_stream.read_16()
            data_stream.skip(4)  # unknown static value
            flag = data_stream.read_16()
            data_size1 = data_stream.read_16()
            n_of_pulses = data_stream.read_16()
            data_size2 = data_stream.read_32()
            if flag == 0:
                unpack16bit(hypermap,
                            pixel_x // downsample,
                            line_cnt // downsample,
                            data_stream.ptr_to(data_size2),
                            n_of_pulses,
                            max_chan)
            elif flag == 1:
                unpack12bit(hypermap,
                            pixel_x // downsample,
                            line_cnt // downsample,
                            data_stream.ptr_to(data_size2),
                            n_of_pulses,
                            max_chan)
            else:
                unpack_instructed(hypermap,
                                  pixel_x // downsample,
                                  line_cnt // downsample,
                                  data_stream.ptr_to(data_size2 - 4),
                                  data_size2 - 4,
                                  max_chan)
                if n_of_pulses > 0:
                    add_pulse_size = data_stream.read_32()
                    for j in range(n_of_pulses):
                        add_val = data_stream.read_16()
                        if add_val < max_chan:
                            hypermap[line_cnt // downsample,
                                     pixel_x // downsample,
                                     add_val] += 1
                else:
                    data_stream.skip(4)


#functions to extract pixel spectrum:

@cython.cdivision(True)
@cython.boundscheck(False)
cdef void unpack_instructed(channel_t[:, :, :] dest, int x, int y,
                            unsigned char * src, uint16_t data_size,
                            int cutoff):
    """
    unpack instructivelly packed delphi array into selection
    of memoryview
    """
    cdef int offset = 0
    cdef int channel = 0
    cdef int i, j, length
    cdef int gain = 0
    cdef Bunch_head* head
    cdef uint16_t val16
    cdef uint32_t val32
    while (offset < data_size):
        head =<Bunch_head*>&src[offset]
        offset +=2
        if head.size == 0:  # empty channels (zero counts)
            channel += head.channels
        else:
            if head.size == 1:
                gain = <int>(src[offset])
            elif head.size == 2:
                gain = <int>read_16(&src[offset])
            elif head.size == 4:
                gain = <int>read_32(&src[offset])
            else:
                gain = <int>read_64(&src[offset])
            offset += head.size
            if head.size == 1:  # special nibble switching case
                for i in range(head.channels):
                    if (i+channel) < cutoff:
                        #reverse the nibbles:
                        if i % 2 == 0:
                            dest[y, x, i+channel] += <channel_t>((src[offset +(i//2)] & 15) + gain)
                        else:
                            dest[y, x, i+channel] += <channel_t>((src[offset +(i//2)] >> 4) + gain)
                if head.channels % 2 == 0:
                    length = <int>(head.channels // 2)
                else:
                    length = <int>((head.channels // 2) +1)
            elif head.size == 2:
                for i in range(head.channels):
                    if (i+channel) < cutoff:
                        dest[y, x, i+channel] += <channel_t>(src[offset + i] + gain)
                length = <int>(head.channels * head.size // 2)
            elif head.size == 4:
                for i in range(head.channels):
                    if (i+channel) < cutoff:
                        val16 = read_16(&src[offset + i*2])
                        dest[y, x, i+channel] += <channel_t>(val16 + gain)
                length = <int>(head.channels * head.size // 2)
            else:
                for i in range(head.channels):
                    if (i+channel) < cutoff:
                        val32 = read_32(&src[offset + i*2])
                        dest[y, x, i+channel] += <channel_t>(val32 + gain)
                length = <int>(head.channels * head.size // 2)
            offset += length
            channel += head.channels


@cython.cdivision(True)
@cython.boundscheck(False)
cdef void unpack12bit(channel_t[:, :, :] dest, int x, int y,
                      unsigned char * src,
                      uint16_t no_of_pulses,
                      int cutoff):
    """unpack 12bit packed array into selection of memoryview"""
    cdef int i, channel
    for i in range(no_of_pulses):
        if i % 4 == 0:
            channel = <int>((src[6*(i//4)] >> 4)+(src[6*(i//4)+1] << 4))
        elif i % 4 == 1:
            channel = <int>(((src[6*(i//4)] << 8 ) + (src[6*(i//4)+3])) & 0xFFF)
        elif i % 4 == 2:
            channel = <int>((src[6*(i//4)+2] << 4) + (src[6*(i//4)+5] >> 4))
        else:
            channel = <int>(((src[6*(i//4)+5] << 8) + src[6*(i//4)+4]) & 0xFFF)
        if channel < cutoff:
            dest[y, x, channel] += 1


@cython.cdivision(True)
@cython.boundscheck(False)
cdef void unpack16bit(channel_t[:, :, :] dest, int x, int y,
                      unsigned char * src,
                      uint16_t no_of_pulses,
                      int cutoff):
    """unpack 16bit packed array into selection of memoryview"""
    cdef int i, channel
    for i in range(no_of_pulses):
        channel = <int>(src[2*i] + ((src[2*i+1] << 8) & 0xff00))
        if channel < cutoff:
            dest[y, x, channel] += 1


#the main function:

def parse_to_numpy(virtual_file, shape, dtype, downsample=1):
    """Parse the hyperspectral cube from brukers bcf binary file
    and return it as numpy array
    
    Parameters
    ----------
    virtual_file : SFSTreeItem
        Virtual file handle returned by SFS_reader instance.
    shape : tuple
        Shape of the dataset.
    dtype : numpy.dtype
        Data type of the dataset.
    downsample : int, optional
        Value for downsampling in navigation space. Default is 1.
        
    """
    blocks, block_size = virtual_file.get_iter_and_properties()[:2]
    map_depth = shape[2]
    hypermap = np.zeros(shape, dtype=dtype)
    cdef DataStream data_stream = DataStream(blocks, block_size)
    if dtype == np.uint8:
        bin_to_numpy[uint8_t](data_stream, hypermap, map_depth, downsample)
        return hypermap
    elif dtype == np.uint16:
        bin_to_numpy[uint16_t](data_stream, hypermap, map_depth, downsample)
        return hypermap
    elif dtype == np.uint32:
        bin_to_numpy[uint32_t](data_stream, hypermap, map_depth, downsample)
        return hypermap
    else:
        raise NotImplementedError('64bit array not implemented!')