1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
|
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Block split point selection utilities. */
#include "block_splitter.h"
#include <string.h> /* memcpy, memset */
#include "../common/platform.h"
#include "bit_cost.h"
#include "cluster.h"
#include "command.h"
#include "fast_log.h"
#include "histogram.h"
#include "memory.h"
#include "quality.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static const size_t kMaxLiteralHistograms = 100;
static const size_t kMaxCommandHistograms = 50;
static const double kLiteralBlockSwitchCost = 28.1;
static const double kCommandBlockSwitchCost = 13.5;
static const double kDistanceBlockSwitchCost = 14.6;
static const size_t kLiteralStrideLength = 70;
static const size_t kCommandStrideLength = 40;
static const size_t kDistanceStrideLength = 40;
static const size_t kSymbolsPerLiteralHistogram = 544;
static const size_t kSymbolsPerCommandHistogram = 530;
static const size_t kSymbolsPerDistanceHistogram = 544;
static const size_t kMinLengthForBlockSplitting = 128;
static const size_t kIterMulForRefining = 2;
static const size_t kMinItersForRefining = 100;
static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
/* Count how many we have. */
size_t total_length = 0;
size_t i;
for (i = 0; i < num_commands; ++i) {
total_length += cmds[i].insert_len_;
}
return total_length;
}
static void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t offset,
const size_t mask,
uint8_t* literals) {
size_t pos = 0;
size_t from_pos = offset & mask;
size_t i;
for (i = 0; i < num_commands; ++i) {
size_t insert_len = cmds[i].insert_len_;
if (from_pos + insert_len > mask) {
size_t head_size = mask + 1 - from_pos;
memcpy(literals + pos, data + from_pos, head_size);
from_pos = 0;
pos += head_size;
insert_len -= head_size;
}
if (insert_len > 0) {
memcpy(literals + pos, data + from_pos, insert_len);
pos += insert_len;
}
from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
}
}
static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
/* Initial seed should be 7. In this case, loop length is (1 << 29). */
*seed *= 16807U;
return *seed;
}
static BROTLI_INLINE double BitCost(size_t count) {
return count == 0 ? -2.0 : FastLog2(count);
}
#define HISTOGRAMS_PER_BATCH 64
#define CLUSTERS_PER_BATCH 16
#define FN(X) X ## Literal
#define DataType uint8_t
/* NOLINTNEXTLINE(build/include) */
#include "block_splitter_inc.h"
#undef DataType
#undef FN
#define FN(X) X ## Command
#define DataType uint16_t
/* NOLINTNEXTLINE(build/include) */
#include "block_splitter_inc.h"
#undef FN
#define FN(X) X ## Distance
/* NOLINTNEXTLINE(build/include) */
#include "block_splitter_inc.h"
#undef DataType
#undef FN
void BrotliInitBlockSplit(BlockSplit* self) {
self->num_types = 0;
self->num_blocks = 0;
self->types = 0;
self->lengths = 0;
self->types_alloc_size = 0;
self->lengths_alloc_size = 0;
}
void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
BROTLI_FREE(m, self->types);
BROTLI_FREE(m, self->lengths);
}
/* Extracts literals, command distance and prefix codes, then applies
* SplitByteVector to create partitioning. */
void BrotliSplitBlock(MemoryManager* m,
const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t pos,
const size_t mask,
const BrotliEncoderParams* params,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
{
size_t literals_count = CountLiterals(cmds, num_commands);
uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
/* Create a continuous array of literals. */
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
/* Create the block split on the array of literals.
* Literal histograms can have alphabet size up to 256.
* Though, to accomodate context modeling, less than half of maximum size
* is allowed. */
SplitByteVectorLiteral(
m, literals, literals_count,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
kLiteralStrideLength, kLiteralBlockSwitchCost, params,
literal_split);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, literals);
/* NB: this might be a good place for injecting extra splitting without
* increasing encoder complexity; however, output parition would be less
* optimal than one produced with forced splitting inside
* SplitByteVector (FindBlocks / ClusterBlocks). */
}
{
/* Compute prefix codes for commands. */
uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
size_t i;
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
for (i = 0; i < num_commands; ++i) {
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
}
/* Create the block split on the array of command prefixes. */
SplitByteVectorCommand(
m, insert_and_copy_codes, num_commands,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
kCommandStrideLength, kCommandBlockSwitchCost, params,
insert_and_copy_split);
if (BROTLI_IS_OOM(m)) return;
/* TODO(eustas): reuse for distances? */
BROTLI_FREE(m, insert_and_copy_codes);
}
{
/* Create a continuous array of distance prefixes. */
uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
size_t j = 0;
size_t i;
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
for (i = 0; i < num_commands; ++i) {
const Command* cmd = &cmds[i];
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
}
}
/* Create the block split on the array of distance prefixes. */
SplitByteVectorDistance(
m, distance_prefixes, j,
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
kDistanceStrideLength, kDistanceBlockSwitchCost, params,
dist_split);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, distance_prefixes);
}
}
#if defined(BROTLI_TEST)
size_t CountLiteralsForTest(const Command*, const size_t);
size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) {
return CountLiterals(cmds, num_commands);
}
void CopyLiteralsToByteArrayForTest(const Command*,
const size_t, const uint8_t*, const size_t, const size_t, uint8_t*);
void CopyLiteralsToByteArrayForTest(const Command* cmds,
const size_t num_commands, const uint8_t* data, const size_t offset,
const size_t mask, uint8_t* literals) {
CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals);
}
#endif
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif
|