File: selectors.rs

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (198 lines) | stat: -rw-r--r-- 7,850 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Note: This file refers to modifiers in ETC1 spec as "selectors". The jargon
//       was inherited from etcpak.

use std::simd::prelude::*;
use std::simd::{Mask, Simd};

use crate::{Reg, Reg32, UReg, SIMD_WIDTH};

// Selector tables from ETC1 spec. The negative part is omitted due to symmetry.
pub const TABLES: [[i16; 2]; 8] =
    [[2, 8], [5, 17], [9, 29], [13, 42], [18, 60], [24, 80], [33, 106], [47, 183]];

/// Conditionally exchange the bottom left 2x2 block with top right 2x2 block,
/// if `flip` for that lane is true.
///
/// i.e. the goal is to flip from:
/// ```text
/// aeim
/// bfjn
/// cgko
/// dhlp
/// ```
/// to:
/// ```text
/// aecg
/// bfdh
/// imko
/// jnlp
/// ```
#[inline]
pub fn flip_pixels(d: &[[[Reg; 3]; 4]; 4], flip: Mask<i16, SIMD_WIDTH>) -> [[[Reg; 3]; 4]; 4] {
    let mut o = [[[Reg::default(); 3]; 4]; 4];
    for y0 in [0, 2] {
        for x0 in [0, 2] {
            for y1 in 0..2 {
                for x1 in 0..2 {
                    for ch in 0..3 {
                        if y0 == x0 {
                            o[y0 + y1][x0 + x1][ch] = d[y0 + y1][x0 + x1][ch];
                        } else {
                            o[y0 + y1][x0 + x1][ch] =
                                flip.select(d[x0 + y1][y0 + x1][ch], d[y0 + y1][x0 + x1][ch]);
                        }
                    }
                }
            }
        }
    }
    o
}

/// Flip the selector codeword if `flip` for that lane is true.
///
/// See [`flip_pixels`] for a description of the flip operation.
#[inline]
pub fn flip_selectors(x: UReg, flip: Mask<i16, SIMD_WIDTH>) -> UReg {
    let keep = x & Simd::splat(0xCC33);
    let bottom_left = x & Simd::splat(0x00CC);
    let top_right = x & Simd::splat(0x3300);

    let flipped = keep | (bottom_left << 6) | (top_right >> 6);
    flip.select(flipped, x)
}

pub struct Fit {
    pub err: Reg32,
    pub table_idx: UReg,
    pub selector_lo: UReg,
    pub selector_hi: UReg,
}

/// Search for the optimal table and selectors for a subblock.
///
/// `data` should be in flipped layout, i.e. 4x2.
///
/// The error function used here is a bit quirky, see code comment for details.
#[inline]
pub fn search_table_and_selectors_subblock(data: &[[[Reg; 3]; 4]], base_color: [Reg; 3]) -> Fit {
    assert_eq!(data.len(), 2);
    // Use fold to compute minimum. Essentially a vector version of min_by_key.
    TABLES
        .iter()
        .enumerate()
        .fold(None, |best_fit, (table_idx, sel_table)| {
            let mut outer_err = Reg32::splat(0);
            let mut selector_lo = UReg::splat(0);
            let mut selector_hi = UReg::splat(0);
            for y in 0..2 {
                for x in 0..4 {
                    // Below, we search for the optimal selector among [-lg, -sm, sm, lg] (sm
                    // and lg is from the selector table).
                    //
                    // We use the error metric:
                    //   abs(gray(q + s - x))
                    //   where q = quantized average, s = selector, x = pixel before compression
                    //         gray(p) = 19*p.r + 38*p.g + 7*p.b  (cf. rec601)
                    //
                    // Note that this is abs(gray(..)) not gray(abs(..)), i.e. the absolute
                    // is taken after computing to grayscale. This allows precomputing
                    // gray(q-x), then exploiting the fact that the selector is same for all
                    // three channels to calculate the final error with a single addition.
                    //
                    // We will first precompute gray(q - x).
                    let mut base_err = Reg::splat(0);
                    let rgb_weight = [19, 38, 7];
                    for ch in 0..3 {
                        base_err += (base_color[ch] - data[y][x][ch]) * Simd::splat(rgb_weight[ch]);
                    }

                    // Now, the sign of selector can be easily decided. To minimize the
                    // absolute value, the selector should be the opposite sign of
                    // gray(q - x).
                    let prefer_neg = base_err.simd_gt(Simd::splat(0));

                    // Finally, we compute the error metric for both sm and lg and decide the
                    // winner.
                    let base_err_abs = base_err.abs();
                    // Subtract in the direction that the final error metric is smaller.
                    // The selector is same for all three channels, so just multiply it by the
                    // total weight.
                    let weight_sum = 64;
                    let err_sm = (base_err_abs - Reg::splat(sel_table[0] * weight_sum)).abs();
                    let err_lg = (base_err_abs - Reg::splat(sel_table[1] * weight_sum)).abs();
                    let prefer_lg = err_lg.simd_lt(err_sm);

                    // The error can be fairly large (a crude upper bound is 255*64). To avoid
                    // overflow after squaring, we use widening multiply and accumulate. This
                    // is somewhat expensive.
                    let best_err = prefer_lg.select(err_lg, err_sm).cast::<i32>();
                    outer_err += best_err * best_err;

                    let pixel_idx = (y + x * 4) as u16;
                    selector_lo |= prefer_lg.select(UReg::splat(1 << pixel_idx), UReg::splat(0));
                    selector_hi |= prefer_neg.select(UReg::splat(1 << pixel_idx), UReg::splat(0));
                }
            }

            let table_idx = UReg::splat(table_idx as u16);
            match best_fit {
                None => Some(Fit { err: outer_err, table_idx, selector_lo, selector_hi }),
                Some(best) => {
                    let lt_32 = outer_err.simd_lt(best.err);
                    let lt = lt_32.cast::<i16>();
                    Some(Fit {
                        err: lt_32.select(outer_err, best.err),
                        table_idx: lt.select(table_idx, best.table_idx),
                        selector_lo: lt.select(selector_lo, best.selector_lo),
                        selector_hi: lt.select(selector_hi, best.selector_hi),
                    })
                }
            }
        })
        .unwrap()
}

/// Search through possible selector tables and selector values for each
/// subblock.
///
/// Returns: Four 16-bit codewords coding the optimal coefficients.
#[inline]
pub fn search_table_and_selectors(
    mut hdr0: UReg,
    hdr1: UReg,
    data: &[[[Reg; 3]; 4]; 4],
    base_color: [[Reg; 3]; 2],
) -> [UReg; 4] {
    // We need to work on pixels in the first subblock, then the second. To allow
    // uniform indices, the flip functions takes care of moving the first
    // subblock to the top half and the second to bottom half. We will fix up
    // the shuffled results in the end.
    let flip = (hdr0 & (UReg::splat(1))).simd_ne(UReg::splat(0));
    let permuted_data = flip_pixels(&data, !flip);

    let mut selector_lo = UReg::splat(0);
    let mut selector_hi = UReg::splat(0);

    for subblock in 0..2 {
        let best_fit = search_table_and_selectors_subblock(
            &permuted_data[subblock * 2..subblock * 2 + 2],
            base_color[subblock],
        );
        let subblock_bit = match subblock {
            0 => 5,
            1 => 2,
            _ => unreachable!(),
        };
        hdr0 |= best_fit.table_idx << subblock_bit;
        selector_lo |= best_fit.selector_lo << (subblock as u16 * 2);
        selector_hi |= best_fit.selector_hi << (subblock as u16 * 2);
    }
    selector_lo = flip_selectors(selector_lo, !flip);
    selector_hi = flip_selectors(selector_hi, !flip);
    [selector_lo, selector_hi, hdr0, hdr1]
}