File: libbz3.h

package info (click to toggle)
bzip3 1.5.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,652 kB
  • sloc: ansic: 6,893; sh: 218; makefile: 82
file content (242 lines) | stat: -rw-r--r-- 9,525 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

/*
 * BZip3 - A spiritual successor to BZip2.
 * Copyright (C) 2022-2024 Kamila Szewczyk
 *
 * This program is free software: you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU Lesser General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef LIBBZ3_H
#define LIBBZ3_H

#include <stddef.h>
#include <stdint.h>

/* Symbol visibility control. */
#ifndef BZIP3_VISIBLE
    #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
        #define BZIP3_VISIBLE __attribute__((visibility("default")))
    #else
        #define BZIP3_VISIBLE
    #endif
#endif

#if defined(BZIP3_DLL_EXPORT) && (BZIP3_DLL_EXPORT == 1)
    #define BZIP3_API __declspec(dllexport) BZIP3_VISIBLE
#elif defined(BZIP3_DLL_IMPORT) && (BZIP3_DLL_IMPORT == 1)
    #define BZIP3_API __declspec(dllimport) BZIP3_VISIBLE
#else
    #define BZIP3_API BZIP3_VISIBLE
#endif

#ifdef __cplusplus
extern "C" {
#endif

#define BZ3_OK 0
#define BZ3_ERR_OUT_OF_BOUNDS -1
#define BZ3_ERR_BWT -2
#define BZ3_ERR_CRC -3
#define BZ3_ERR_MALFORMED_HEADER -4
#define BZ3_ERR_TRUNCATED_DATA -5
#define BZ3_ERR_DATA_TOO_BIG -6
#define BZ3_ERR_INIT -7
#define BZ3_ERR_DATA_SIZE_TOO_SMALL -8

struct bz3_state;

/**
 * @brief Get bzip3 version.
 */
BZIP3_API const char * bz3_version(void);

/**
 * @brief Get the last error number associated with a given state.
 */
BZIP3_API int8_t bz3_last_error(struct bz3_state * state);

/**
 * @brief Return a user-readable message explaining the cause of the last error.
 */
BZIP3_API const char * bz3_strerror(struct bz3_state * state);

/**
 * @brief Construct a new block encoder state, which will encode blocks as big as the given block size.
 * The decoder will be able to decode blocks at most as big as the given block size.
 * Returns NULL in case allocation fails or the block size is not between 65K and 511M
 */
BZIP3_API struct bz3_state * bz3_new(int32_t block_size);

/**
 * @brief Free the memory occupied by a block encoder state.
 */
BZIP3_API void bz3_free(struct bz3_state * state);

/**
 * @brief Return the recommended size of the output buffer for the compression functions.
 */
BZIP3_API size_t bz3_bound(size_t input_size);

/* ** HIGH LEVEL APIs ** */

/**
 * @brief Compress a frame. This function does not support parallelism
 * by itself, consider using the low level `bz3_encode_blocks()` function instead.
 * Using the low level API might provide better performance.
 * Returns a bzip3 error code; BZ3_OK when the operation is successful.
 * Make sure to set out_size to the size of the output buffer before the operation;
 * out_size must be at least equal to `bz3_bound(in_size)'.
 */
BZIP3_API int bz3_compress(uint32_t block_size, const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size);

/**
 * @brief Decompress a frame. This function does not support parallelism
 * by itself, consider using the low level `bz3_decode_blocks()` function instead.
 * Using the low level API might provide better performance.
 * Returns a bzip3 error code; BZ3_OK when the operation is successful.
 * Make sure to set out_size to the size of the output buffer before the operation.
 */
BZIP3_API int bz3_decompress(const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size);

/**
 * @brief Calculate the minimal memory required for compression with the given block size.
 * This includes all internal buffers and state structures. This calculates the amount of bytes
 * that will be allocated by a call to `bz3_new()`.
 * 
 * @details Memory allocation and usage patterns:
 * 
 * bz3_new():
 *    - Allocates all memory upfront:
 *      - Core state structure (sizeof(struct bz3_state))
 *      - Swap buffer (bz3_bound(block_size) bytes)
 *      - SAIS array (BWT_BOUND(block_size) * sizeof(int32_t) bytes)
 *      - LZP lookup table ((1 << LZP_DICTIONARY) * sizeof(int32_t) bytes)
 *      - Compression state (sizeof(state))
 *    - All memory remains allocated until bz3_free()
 * 
 * Additional memory may be used depending on API used from here.
 * 
 * # Low Level APIs
 * 
 * 1. bz3_encode_block() / bz3_decode_block():
 *    - Uses pre-allocated memory from bz3_new()
 *    - No additional memory allocation except for libsais (usually ~16KiB)
 *    - Peak memory usage of physical RAM varies with compression stages:
 *      - LZP: Uses LZP lookup table + swap buffer
 *      - BWT: Uses SAIS array + swap buffer
 *      - Entropy coding: Uses compression state (cm_state) + swap buffer
 * 
 * Using the higher level API, `bz3_compress`, expect an additional allocation
 * of `bz3_bound(block_size)`.
 * 
 * In the parallel version `bz3_encode_blocks`, each thread gets its own state,
 * so memory usage is `n_threads * bz3_compress_memory_needed()`.
 * 
 * # High Level APIs
 * 
 * 1. bz3_compress():
 *    - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes)
 *      in addition to the memory amount returned by this method call and libsais.
 *    - Everything is freed after compression completes
 * 
 * 2. bz3_decompress():
 *    - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes)
 *      in addition to the memory amount returned by this method call and libsais.
 *    - Everything is freed after compression completes
 * 
 * Memory remains constant during operation, with except of some small allocations from libsais during
 * BWT stage. That is not accounted by this function, though it usually amounts to ~16KiB, negligible.
 * The worst case of BWT is 2*block_size technically speaking.
 * 
 * No dynamic (re)allocation occurs outside of that.
 * 
 * @param block_size The block size to be used for compression
 * @return The total number of bytes required for compression, or 0 if block_size is invalid
 */
BZIP3_API size_t bz3_min_memory_needed(int32_t block_size);

/* ** LOW LEVEL APIs ** */

/**
 * @brief Encode a single block. Returns the amount of bytes written to `buffer'.
 * `buffer' must be able to hold at least `bz3_bound(size)' bytes. The size must not
 * exceed the block size associated with the state.
 */
BZIP3_API int32_t bz3_encode_block(struct bz3_state * state, uint8_t * buffer, int32_t size);

/**
 * @brief Decode a single block.
 * 
 * `buffer' must be able to hold at least `bz3_bound(orig_size)' bytes
 * in order to ensure decompression will succeed for all possible bzip3 blocks.
 * 
 * In most (but not all) cases, `orig_size` should usually be sufficient.
 * If it is not sufficient, you must allocate a buffer of size `bz3_bound(orig_size)` temporarily. 
 * 
 * If `buffer_size` is too small, `BZ3_ERR_DATA_SIZE_TOO_SMALL` will be returned.
 * The size must not exceed the block size associated with the state.
 * 
 * @param buffer_size The size of the buffer at 'buffer'
 * @param compressed_size The size of the compressed data in 'buffer'
 * @param orig_size The original size of the data before compression.
 */
BZIP3_API int32_t bz3_decode_block(struct bz3_state * state, uint8_t * buffer, size_t buffer_size, int32_t compressed_size, int32_t orig_size);

/**
 * @brief Encode `n' blocks, all in parallel.
 * All specifics of the `bz3_encode_block' still hold. The function will launch a thread for each block.
 * The compressed sizes are written to the `sizes' array. Every buffer is overwritten and none of them can overlap.
 * Precisely `n' states, buffers and sizes must be supplied.
 *
 * Expects `n' between 2 and 16.
 *
 * Present in the shared library only if -lpthread was present during building.
 */
BZIP3_API void bz3_encode_blocks(struct bz3_state * states[], uint8_t * buffers[], int32_t sizes[], int32_t n);

/**
 * @brief Decode `n' blocks, all in parallel.
 * Same specifics as `bz3_encode_blocks', but doesn't overwrite `sizes'.
 */
BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], uint8_t * buffers[], size_t buffer_sizes[], int32_t sizes[],
                                 int32_t orig_sizes[], int32_t n);

/**
 * @brief Check if using original file size as buffer size is sufficient for decompressing
 * a block at `block` pointer.
 * 
 * @param block Pointer to the compressed block data
 * @param block_size Size of the block buffer in bytes (must be at least 13 bytes for header)
 * @param orig_size Size of the original uncompressed data 
 * @return 1 if original size is sufficient, 0 if insufficient, -1 on header error (insufficient buffer size)
 * 
 * @remarks
 * 
 *      This function is useful for external APIs using the low level block encoding API,
 *      `bz3_encode_block`. You would normally call this directly after `bz3_encode_block`
 *      on the block that has been output.
 *      
 *      The purpose of this function is to prevent encoding blocks that would require an additional
 *      malloc at decompress time.
 *      The goal is to prevent erroring with `BZ3_ERR_DATA_SIZE_TOO_SMALL`, thus
 *      in turn 
 */
BZIP3_API int bz3_orig_size_sufficient_for_decode(const uint8_t * block, size_t block_size, int32_t orig_size);


#ifdef __cplusplus
} /* extern "C" */
#endif

#endif