1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
|
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Note: This file refers to modifiers in ETC1 spec as "selectors". The jargon
// was inherited from etcpak.
use std::simd::prelude::*;
use std::simd::{Mask, Simd};
use crate::{Reg, Reg32, UReg, SIMD_WIDTH};
// Selector tables from ETC1 spec. The negative part is omitted due to symmetry.
pub const TABLES: [[i16; 2]; 8] =
[[2, 8], [5, 17], [9, 29], [13, 42], [18, 60], [24, 80], [33, 106], [47, 183]];
/// Conditionally exchange the bottom left 2x2 block with top right 2x2 block,
/// if `flip` for that lane is true.
///
/// i.e. the goal is to flip from:
/// ```text
/// aeim
/// bfjn
/// cgko
/// dhlp
/// ```
/// to:
/// ```text
/// aecg
/// bfdh
/// imko
/// jnlp
/// ```
#[inline]
pub fn flip_pixels(d: &[[[Reg; 3]; 4]; 4], flip: Mask<i16, SIMD_WIDTH>) -> [[[Reg; 3]; 4]; 4] {
let mut o = [[[Reg::default(); 3]; 4]; 4];
for y0 in [0, 2] {
for x0 in [0, 2] {
for y1 in 0..2 {
for x1 in 0..2 {
for ch in 0..3 {
if y0 == x0 {
o[y0 + y1][x0 + x1][ch] = d[y0 + y1][x0 + x1][ch];
} else {
o[y0 + y1][x0 + x1][ch] =
flip.select(d[x0 + y1][y0 + x1][ch], d[y0 + y1][x0 + x1][ch]);
}
}
}
}
}
}
o
}
/// Flip the selector codeword if `flip` for that lane is true.
///
/// See [`flip_pixels`] for a description of the flip operation.
#[inline]
pub fn flip_selectors(x: UReg, flip: Mask<i16, SIMD_WIDTH>) -> UReg {
let keep = x & Simd::splat(0xCC33);
let bottom_left = x & Simd::splat(0x00CC);
let top_right = x & Simd::splat(0x3300);
let flipped = keep | (bottom_left << 6) | (top_right >> 6);
flip.select(flipped, x)
}
pub struct Fit {
pub err: Reg32,
pub table_idx: UReg,
pub selector_lo: UReg,
pub selector_hi: UReg,
}
/// Search for the optimal table and selectors for a subblock.
///
/// `data` should be in flipped layout, i.e. 4x2.
///
/// The error function used here is a bit quirky, see code comment for details.
#[inline]
pub fn search_table_and_selectors_subblock(data: &[[[Reg; 3]; 4]], base_color: [Reg; 3]) -> Fit {
assert_eq!(data.len(), 2);
// Use fold to compute minimum. Essentially a vector version of min_by_key.
TABLES
.iter()
.enumerate()
.fold(None, |best_fit, (table_idx, sel_table)| {
let mut outer_err = Reg32::splat(0);
let mut selector_lo = UReg::splat(0);
let mut selector_hi = UReg::splat(0);
for y in 0..2 {
for x in 0..4 {
// Below, we search for the optimal selector among [-lg, -sm, sm, lg] (sm
// and lg is from the selector table).
//
// We use the error metric:
// abs(gray(q + s - x))
// where q = quantized average, s = selector, x = pixel before compression
// gray(p) = 19*p.r + 38*p.g + 7*p.b (cf. rec601)
//
// Note that this is abs(gray(..)) not gray(abs(..)), i.e. the absolute
// is taken after computing to grayscale. This allows precomputing
// gray(q-x), then exploiting the fact that the selector is same for all
// three channels to calculate the final error with a single addition.
//
// We will first precompute gray(q - x).
let mut base_err = Reg::splat(0);
let rgb_weight = [19, 38, 7];
for ch in 0..3 {
base_err += (base_color[ch] - data[y][x][ch]) * Simd::splat(rgb_weight[ch]);
}
// Now, the sign of selector can be easily decided. To minimize the
// absolute value, the selector should be the opposite sign of
// gray(q - x).
let prefer_neg = base_err.simd_gt(Simd::splat(0));
// Finally, we compute the error metric for both sm and lg and decide the
// winner.
let base_err_abs = base_err.abs();
// Subtract in the direction that the final error metric is smaller.
// The selector is same for all three channels, so just multiply it by the
// total weight.
let weight_sum = 64;
let err_sm = (base_err_abs - Reg::splat(sel_table[0] * weight_sum)).abs();
let err_lg = (base_err_abs - Reg::splat(sel_table[1] * weight_sum)).abs();
let prefer_lg = err_lg.simd_lt(err_sm);
// The error can be fairly large (a crude upper bound is 255*64). To avoid
// overflow after squaring, we use widening multiply and accumulate. This
// is somewhat expensive.
let best_err = prefer_lg.select(err_lg, err_sm).cast::<i32>();
outer_err += best_err * best_err;
let pixel_idx = (y + x * 4) as u16;
selector_lo |= prefer_lg.select(UReg::splat(1 << pixel_idx), UReg::splat(0));
selector_hi |= prefer_neg.select(UReg::splat(1 << pixel_idx), UReg::splat(0));
}
}
let table_idx = UReg::splat(table_idx as u16);
match best_fit {
None => Some(Fit { err: outer_err, table_idx, selector_lo, selector_hi }),
Some(best) => {
let lt_32 = outer_err.simd_lt(best.err);
let lt = lt_32.cast::<i16>();
Some(Fit {
err: lt_32.select(outer_err, best.err),
table_idx: lt.select(table_idx, best.table_idx),
selector_lo: lt.select(selector_lo, best.selector_lo),
selector_hi: lt.select(selector_hi, best.selector_hi),
})
}
}
})
.unwrap()
}
/// Search through possible selector tables and selector values for each
/// subblock.
///
/// Returns: Four 16-bit codewords coding the optimal coefficients.
#[inline]
pub fn search_table_and_selectors(
mut hdr0: UReg,
hdr1: UReg,
data: &[[[Reg; 3]; 4]; 4],
base_color: [[Reg; 3]; 2],
) -> [UReg; 4] {
// We need to work on pixels in the first subblock, then the second. To allow
// uniform indices, the flip functions takes care of moving the first
// subblock to the top half and the second to bottom half. We will fix up
// the shuffled results in the end.
let flip = (hdr0 & (UReg::splat(1))).simd_ne(UReg::splat(0));
let permuted_data = flip_pixels(&data, !flip);
let mut selector_lo = UReg::splat(0);
let mut selector_hi = UReg::splat(0);
for subblock in 0..2 {
let best_fit = search_table_and_selectors_subblock(
&permuted_data[subblock * 2..subblock * 2 + 2],
base_color[subblock],
);
let subblock_bit = match subblock {
0 => 5,
1 => 2,
_ => unreachable!(),
};
hdr0 |= best_fit.table_idx << subblock_bit;
selector_lo |= best_fit.selector_lo << (subblock as u16 * 2);
selector_hi |= best_fit.selector_hi << (subblock as u16 * 2);
}
selector_lo = flip_selectors(selector_lo, !flip);
selector_hi = flip_selectors(selector_hi, !flip);
[selector_lo, selector_hi, hdr0, hdr1]
}
|