File: ext.pyx

package info (click to toggle)
bitshuffle 0.3.5-3.1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 612 kB
  • sloc: ansic: 3,477; python: 836; makefile: 15; sh: 2
file content (449 lines) | stat: -rw-r--r-- 14,223 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
"""
Wrappers for public and private bitshuffle routines

"""

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np

cimport numpy as np
cimport cython


np.import_array()


# Repeat each calculation this many times. For timing.
cdef int REPEATC = 1
#cdef int REPEATC = 32

REPEAT = REPEATC

cdef extern from b"bitshuffle.h":
    int bshuf_using_NEON()
    int bshuf_using_SSE2()
    int bshuf_using_AVX2()
    int bshuf_bitshuffle(void *A, void *B, int size, int elem_size,
            int block_size)
    int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size,
            int block_size)
    int bshuf_compress_lz4_bound(int size, int elem_size, int block_size)
    int bshuf_compress_lz4(void *A, void *B, int size, int elem_size,
            int block_size)
    int bshuf_decompress_lz4(void *A, void *B, int size, int elem_size,
            int block_size)
    int BSHUF_VERSION_MAJOR
    int BSHUF_VERSION_MINOR
    int BSHUF_VERSION_POINT


__version__ = str("%d.%d.%d").format(BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR,
        BSHUF_VERSION_POINT)


# Prototypes from bitshuffle.c
cdef extern int bshuf_copy(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_byte_elem_scal(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_byte_elem_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_byte_elem_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_scal(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_byte_bitrow_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_byte_bitrow_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_byte_bitrow_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_byte_bitrow_scal(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size)


ctypedef int (*Cfptr) (void *A, void *B, int size, int elem_size)


def using_NEON():
    """Whether compiled using Arm NEON instructions."""
    if bshuf_using_NEON():
        return True
    else:
        return False


def using_SSE2():
    """Whether compiled using SSE2 instructions."""
    if bshuf_using_SSE2():
        return True
    else:
        return False


def using_AVX2():
    """Whether compiled using AVX2 instructions."""
    if bshuf_using_AVX2():
        return True
    else:
        return False


def _setup_arr(arr):
    shape = tuple(arr.shape)
    if not arr.flags['C_CONTIGUOUS']:
        msg = "Input array must be C-contiguous."
        raise ValueError(msg)
    size = arr.size
    dtype = arr.dtype
    itemsize = dtype.itemsize
    out = np.empty(shape, dtype=dtype)
    return out, size, itemsize


@cython.boundscheck(False)
@cython.wraparound(False)
cdef _wrap_C_fun(Cfptr fun, np.ndarray arr):
    """Wrap a C function with standard call signature."""

    cdef int ii, size, itemsize, count=0
    cdef np.ndarray out
    out, size, itemsize = _setup_arr(arr)

    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
    arr_flat = arr.view(np.uint8).ravel()
    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
    out_flat = out.view(np.uint8).ravel()
    cdef void* arr_ptr = <void*> &arr_flat[0]
    cdef void* out_ptr = <void*> &out_flat[0]

    for ii in range(REPEATC):
        count = fun(arr_ptr, out_ptr, size, itemsize)
    if count < 0:
        msg = "Failed. Error code %d."
        excp = RuntimeError(msg % count, count)
        raise excp
    return out


def copy(np.ndarray arr not None):
    """Copies the data.

    For testing and profiling purposes.

    """
    return _wrap_C_fun(&bshuf_copy, arr)


def trans_byte_elem_scal(np.ndarray arr not None):
    """Transpose bytes within words but not bits.

    """
    return _wrap_C_fun(&bshuf_trans_byte_elem_scal, arr)


def trans_byte_elem_SSE(np.ndarray arr not None):
    """Transpose bytes within array elements.

    """
    return _wrap_C_fun(&bshuf_trans_byte_elem_SSE, arr)


def trans_byte_elem_NEON(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_byte_elem_NEON, arr)


def trans_bit_byte_scal(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_byte_scal, arr)


def trans_bit_byte_SSE(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_byte_SSE, arr)


def trans_bit_byte_NEON(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_byte_NEON, arr)


def trans_bit_byte_AVX(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr)


def trans_bitrow_eight(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr)


def trans_bit_elem_AVX(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr)


def trans_bit_elem_scal(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_elem_scal, arr)


def trans_bit_elem_SSE(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_elem_SSE, arr)


def trans_bit_elem_NEON(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_elem_NEON, arr)


def trans_byte_bitrow_SSE(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_byte_bitrow_SSE, arr)


def trans_byte_bitrow_NEON(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_byte_bitrow_NEON, arr)


def trans_byte_bitrow_AVX(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_byte_bitrow_AVX, arr)


def trans_byte_bitrow_scal(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_byte_bitrow_scal, arr)


def shuffle_bit_eightelem_scal(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_scal, arr)


def shuffle_bit_eightelem_SSE(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_SSE, arr)


def shuffle_bit_eightelem_NEON(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_NEON, arr)


def shuffle_bit_eightelem_AVX(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr)


def untrans_bit_elem_SSE(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr)


def untrans_bit_elem_NEON(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_untrans_bit_elem_NEON, arr)


def untrans_bit_elem_AVX(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr)


def untrans_bit_elem_scal(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr)


def trans_bit_elem(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_trans_bit_elem, arr)


def untrans_bit_elem(np.ndarray arr not None):
    return _wrap_C_fun(&bshuf_untrans_bit_elem, arr)


@cython.boundscheck(False)
@cython.wraparound(False)
def bitshuffle(np.ndarray arr not None, int block_size=0):
    """Bitshuffle an array.

    Output array is the same shape and data type as input array but underlying
    buffer has been bitshuffled.

    Parameters
    ----------
    arr : numpy array
        Data to ne processed.
    block_size : positive integer
        Block size in number of elements. By default, block size is chosen
        automatically.

    Returns
    -------
    out : numpy array
        Array with the same shape as input but underlying data has been
        bitshuffled.

    """

    cdef int ii, size, itemsize, count=0
    cdef np.ndarray out
    out, size, itemsize = _setup_arr(arr)

    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
    arr_flat = arr.view(np.uint8).ravel()
    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
    out_flat = out.view(np.uint8).ravel()
    cdef void* arr_ptr = <void*> &arr_flat[0]
    cdef void* out_ptr = <void*> &out_flat[0]

    for ii in range(REPEATC):
        count = bshuf_bitshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
    if count < 0:
        msg = "Failed. Error code %d."
        excp = RuntimeError(msg % count, count)
        raise excp
    return out


@cython.boundscheck(False)
@cython.wraparound(False)
def bitunshuffle(np.ndarray arr not None, int block_size=0):
    """Bitshuffle an array.

    Output array is the same shape and data type as input array but underlying
    buffer has been un-bitshuffled.

    Parameters
    ----------
    arr : numpy array
        Data to ne processed.
    block_size : positive integer
        Block size in number of elements. Must match value used for shuffling.

    Returns
    -------
    out : numpy array
        Array with the same shape as input but underlying data has been
        un-bitshuffled.

    """

    cdef int ii, size, itemsize, count=0
    cdef np.ndarray out
    out, size, itemsize = _setup_arr(arr)

    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
    arr_flat = arr.view(np.uint8).ravel()
    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
    out_flat = out.view(np.uint8).ravel()
    cdef void* arr_ptr = <void*> &arr_flat[0]
    cdef void* out_ptr = <void*> &out_flat[0]

    for ii in range(REPEATC):
        count = bshuf_bitunshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
    if count < 0:
        msg = "Failed. Error code %d."
        excp = RuntimeError(msg % count, count)
        raise excp
    return out


@cython.boundscheck(False)
@cython.wraparound(False)
def compress_lz4(np.ndarray arr not None, int block_size=0):
    """Bitshuffle then compress an array using LZ4.

    Parameters
    ----------
    arr : numpy array
        Data to ne processed.
    block_size : positive integer
        Block size in number of elements. By default, block size is chosen
        automatically.

    Returns
    -------
    out : array with np.uint8 data type
        Buffer holding compressed data.

    """

    cdef int ii, size, itemsize, count=0
    shape = (arr.shape[i] for i in range(arr.ndim))
    if not arr.flags['C_CONTIGUOUS']:
        msg = "Input array must be C-contiguous."
        raise ValueError(msg)
    size = arr.size
    dtype = arr.dtype
    itemsize = dtype.itemsize

    max_out_size = bshuf_compress_lz4_bound(size, itemsize, block_size)

    cdef np.ndarray out
    out = np.empty(max_out_size, dtype=np.uint8)

    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
    arr_flat = arr.view(np.uint8).ravel()
    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
    out_flat = out.view(np.uint8).ravel()
    cdef void* arr_ptr = <void*> &arr_flat[0]
    cdef void* out_ptr = <void*> &out_flat[0]
    for ii in range(REPEATC):
        count = bshuf_compress_lz4(arr_ptr, out_ptr, size, itemsize, block_size)
    if count < 0:
        msg = "Failed. Error code %d."
        excp = RuntimeError(msg % count, count)
        raise excp
    return out[:count]


@cython.boundscheck(False)
@cython.wraparound(False)
def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0):
    """Decompress a buffer using LZ4 then bitunshuffle it yielding an array.

    Parameters
    ----------
    arr : numpy array
        Input data to be decompressed.
    shape : tuple of integers
        Shape of the output (decompressed array). Must match the shape of the
        original data array before compression.
    dtype : numpy dtype
        Datatype of the output array. Must match the data type of the original
        data array before compression.
    block_size : positive integer
        Block size in number of elements. Must match value used for
        compression.

    Returns
    -------
    out : numpy array with shape *shape* and data type *dtype*
        Decompressed data.

    """

    cdef int ii, size, itemsize, count=0
    if not arr.flags['C_CONTIGUOUS']:
        msg = "Input array must be C-contiguous."
        raise ValueError(msg)
    size = np.prod(shape)
    itemsize = dtype.itemsize

    cdef np.ndarray out
    out = np.empty(tuple(shape), dtype=dtype)

    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
    arr_flat = arr.view(np.uint8).ravel()
    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
    out_flat = out.view(np.uint8).ravel()
    cdef void* arr_ptr = <void*> &arr_flat[0]
    cdef void* out_ptr = <void*> &out_flat[0]
    for ii in range(REPEATC):
        count = bshuf_decompress_lz4(arr_ptr, out_ptr, size, itemsize,
                                     block_size)
    if count < 0:
        msg = "Failed. Error code %d."
        excp = RuntimeError(msg % count, count)
        raise excp
    if count != arr.size:
        msg = "Decompressed different number of bytes than input buffer size."
        msg += "Input buffer %d, decompressed %d." % (arr.size, count)
        raise RuntimeError(msg, count)
    return out