1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
|
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2022 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions for generating partition tables on demand.
*/
#include "astcenc_internal.h"
/**
* @brief Generate a canonical representation of a partition pattern.
*
* The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
* the remapped texel index. Remapping ensures that we only match on the partition pattern,
* independent of the partition order generated by the hash.
*
* @param texel_count The number of texels in the block.
* @param partition_of_texel The partition assignments, in hash order.
* @param[out] bit_pattern The output bit pattern representation.
*/
static void generate_canonical_partitioning(
unsigned int texel_count,
const uint8_t* partition_of_texel,
uint64_t bit_pattern[7]
) {
// Clear the pattern
for (unsigned int i = 0; i < 7; i++)
{
bit_pattern[i] = 0;
}
// Store a mapping to reorder the raw partitions so that the the partitions are ordered such
// that the lowest texel index in partition N is smaller than the lowest texel index in
// partition N + 1.
int mapped_index[BLOCK_MAX_PARTITIONS];
int map_weight_count = 0;
for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
{
mapped_index[i] = -1;
}
for (unsigned int i = 0; i < texel_count; i++)
{
int index = partition_of_texel[i];
if (mapped_index[index] < 0)
{
mapped_index[index] = map_weight_count++;
}
uint64_t xlat_index = mapped_index[index];
bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
}
}
/**
* @brief Compare two canonical patterns to see if they are the same.
*
* @param part1 The first canonical bit pattern to check.
* @param part2 The second canonical bit pattern to check.
*
* @return @c true if the patterns are the same, @c false otherwise.
*/
static bool compare_canonical_partitionings(
const uint64_t part1[7],
const uint64_t part2[7]
) {
return (part1[0] == part2[0]) && (part1[1] == part2[1]) &&
(part1[2] == part2[2]) && (part1[3] == part2[3]) &&
(part1[4] == part2[4]) && (part1[5] == part2[5]) &&
(part1[6] == part2[6]);
}
/**
* @brief Hash function used for procedural partition assignment.
*
* @param inp The hash seed.
*
* @return The hashed value.
*/
static uint32_t hash52(
uint32_t inp
) {
inp ^= inp >> 15;
// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
inp *= 0xEEDE0891;
inp ^= inp >> 5;
inp += inp << 16;
inp ^= inp >> 7;
inp ^= inp >> 3;
inp ^= inp << 6;
inp ^= inp >> 17;
return inp;
}
/**
* @brief Select texel assignment for a single coordinate.
*
* @param seed The seed - the partition index from the block.
* @param x The texel X coordinate in the block.
* @param y The texel Y coordinate in the block.
* @param z The texel Z coordinate in the block.
* @param partition_count The total partition count of this encoding.
* @param small_block @c true if the blockhas fewer than 32 texels.
*
* @return The assigned partition index for this texel.
*/
static uint8_t select_partition(
int seed,
int x,
int y,
int z,
int partition_count,
bool small_block
) {
// For small blocks bias the coordinates to get better distribution
if (small_block)
{
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partition_count - 1) * 1024;
uint32_t rnum = hash52(seed);
uint8_t seed1 = rnum & 0xF;
uint8_t seed2 = (rnum >> 4) & 0xF;
uint8_t seed3 = (rnum >> 8) & 0xF;
uint8_t seed4 = (rnum >> 12) & 0xF;
uint8_t seed5 = (rnum >> 16) & 0xF;
uint8_t seed6 = (rnum >> 20) & 0xF;
uint8_t seed7 = (rnum >> 24) & 0xF;
uint8_t seed8 = (rnum >> 28) & 0xF;
uint8_t seed9 = (rnum >> 18) & 0xF;
uint8_t seed10 = (rnum >> 22) & 0xF;
uint8_t seed11 = (rnum >> 26) & 0xF;
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
// Squaring all the seeds in order to bias their distribution towards lower values.
seed1 *= seed1;
seed2 *= seed2;
seed3 *= seed3;
seed4 *= seed4;
seed5 *= seed5;
seed6 *= seed6;
seed7 *= seed7;
seed8 *= seed8;
seed9 *= seed9;
seed10 *= seed10;
seed11 *= seed11;
seed12 *= seed12;
int sh1, sh2;
if (seed & 1)
{
sh1 = (seed & 2 ? 4 : 5);
sh2 = (partition_count == 3 ? 6 : 5);
}
else
{
sh1 = (partition_count == 3 ? 6 : 5);
sh2 = (seed & 2 ? 4 : 5);
}
int sh3 = (seed & 0x10) ? sh1 : sh2;
seed1 >>= sh1;
seed2 >>= sh2;
seed3 >>= sh1;
seed4 >>= sh2;
seed5 >>= sh1;
seed6 >>= sh2;
seed7 >>= sh1;
seed8 >>= sh2;
seed9 >>= sh3;
seed10 >>= sh3;
seed11 >>= sh3;
seed12 >>= sh3;
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
// Apply the saw
a &= 0x3F;
b &= 0x3F;
c &= 0x3F;
d &= 0x3F;
// Remove some of the components if we are to output < 4 partitions.
if (partition_count <= 3)
{
d = 0;
}
if (partition_count <= 2)
{
c = 0;
}
if (partition_count <= 1)
{
b = 0;
}
uint8_t partition;
if (a >= b && a >= c && a >= d)
{
partition = 0;
}
else if (b >= c && b >= d)
{
partition = 1;
}
else if (c >= d)
{
partition = 2;
}
else
{
partition = 3;
}
return partition;
}
/**
* @brief Generate a single partition info structure.
*
* @param[out] bsd The block size information.
* @param partition_count The partition count of this partitioning.
* @param partition_index The partition index / seed of this partitioning.
* @param partition_remap_index The remapped partition index of this partitioning.
* @param[out] pi The partition info structure to populate.
*
* @return True if this is a useful partition index, False if we can skip it.
*/
static bool generate_one_partition_info_entry(
block_size_descriptor& bsd,
unsigned int partition_count,
unsigned int partition_index,
unsigned int partition_remap_index,
partition_info& pi
) {
int texels_per_block = bsd.texel_count;
bool small_block = texels_per_block < 32;
uint8_t *partition_of_texel = pi.partition_of_texel;
// Assign texels to partitions
int texel_idx = 0;
int counts[BLOCK_MAX_PARTITIONS] { 0 };
for (unsigned int z = 0; z < bsd.zdim; z++)
{
for (unsigned int y = 0; y < bsd.ydim; y++)
{
for (unsigned int x = 0; x < bsd.xdim; x++)
{
uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
*partition_of_texel++ = part;
}
}
}
// Fill loop tail so we can overfetch later
for (unsigned int i = 0; i < partition_count; i++)
{
int ptex_count = counts[i];
int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
for (int j = ptex_count; j < ptex_count_simd; j++)
{
pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
}
}
// Populate the actual procedural partition count
if (counts[0] == 0)
{
pi.partition_count = 0;
}
else if (counts[1] == 0)
{
pi.partition_count = 1;
}
else if (counts[2] == 0)
{
pi.partition_count = 2;
}
else if (counts[3] == 0)
{
pi.partition_count = 3;
}
else
{
pi.partition_count = 4;
}
// Populate the partition index
pi.partition_index = static_cast<uint16_t>(partition_index);
// Populate the coverage bitmaps for 2/3/4 partitions
uint64_t* bitmaps { nullptr };
if (partition_count == 2)
{
bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
}
else if (partition_count == 3)
{
bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
}
else if (partition_count == 4)
{
bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
}
for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
{
pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
}
// Valid partitionings have texels in all of the requested partitions
bool valid = pi.partition_count == partition_count;
if (bitmaps)
{
// Populate the partition coverage bitmap
for (unsigned int i = 0; i < partition_count; i++)
{
bitmaps[i] = 0ULL;
}
unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
for (unsigned int i = 0; i < texels_to_process; i++)
{
unsigned int idx = bsd.kmeans_texels[i];
bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
}
}
return valid;
}
static void build_partition_table_for_one_partition_count(
block_size_descriptor& bsd,
bool can_omit_partitionings,
unsigned int partition_count_cutoff,
unsigned int partition_count,
partition_info* ptab,
uint64_t* canonical_patterns
) {
unsigned int next_index = 0;
bsd.partitioning_count_selected[partition_count - 1] = 0;
bsd.partitioning_count_all[partition_count - 1] = 0;
// Skip tables larger than config max partition count if we can omit modes
if (can_omit_partitionings && (partition_count > partition_count_cutoff))
{
return;
}
// Iterate through twice
// - Pass 0: Keep selected partitionings
// - Pass 1: Keep non-selected partitionings (skip if in omit mode)
unsigned int max_iter = can_omit_partitionings ? 1 : 2;
// Tracker for things we built in the first iteration
uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
for (unsigned int x = 0; x < max_iter; x++)
{
for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
{
// Don't include things we built in the first pass
if ((x == 1) && build[i])
{
continue;
}
bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
if ((x == 0) && !keep_useful)
{
continue;
}
generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * 7);
bool keep_canonical = true;
for (unsigned int j = 0; j < next_index; j++)
{
bool match = compare_canonical_partitionings(canonical_patterns + 7 * next_index, canonical_patterns + 7 * j);
if (match)
{
keep_canonical = false;
break;
}
}
if (keep_useful && keep_canonical)
{
if (x == 0)
{
bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
bsd.partitioning_count_selected[partition_count - 1]++;
bsd.partitioning_count_all[partition_count - 1]++;
build[i] = 1;
next_index++;
}
}
else
{
if (x == 1)
{
bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
bsd.partitioning_count_all[partition_count - 1]++;
next_index++;
}
}
}
}
}
/* See header for documentation. */
void init_partition_tables(
block_size_descriptor& bsd,
bool can_omit_partitionings,
unsigned int partition_count_cutoff
) {
partition_info* par_tab2 = bsd.partitionings;
partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
bsd.partitioning_count_selected[0] = 1;
bsd.partitioning_count_all[0] = 1;
uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * 7];
build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
delete[] canonical_patterns;
}
|