File: ffv1_rct_search.comp

package info (click to toggle)
ffmpeg 7%3A8.0.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 111,180 kB
  • sloc: ansic: 1,348,518; asm: 145,582; sh: 9,273; makefile: 5,323; cpp: 3,451; lisp: 1,771; perl: 1,303; objc: 1,058; python: 120; awk: 56; ruby: 51
file content (139 lines) | stat: -rw-r--r-- 4,220 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
 * FFv1 codec
 *
 * Copyright (c) 2024 Lynne <dev@lynne.ee>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

ivec3 load_components(ivec2 pos)
{
    ivec3 pix = ivec3(imageLoad(src[0], pos));
    if (planar_rgb != 0) {
        for (int i = 1; i < 3; i++)
            pix[i] = int(imageLoad(src[i], pos)[0]);
    }

    return ivec3(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]]);
}

#define NUM_CHECKS 15
const ivec2 rct_y_coeff[NUM_CHECKS] = {
    ivec2(0, 0), //      4G

    ivec2(0, 1), //      3G +  B
    ivec2(1, 0), //  R + 3G
    ivec2(1, 1), //  R + 2G + B

    ivec2(0, 2), //      2G + 2B
    ivec2(2, 0), // 2R + 2G
    ivec2(2, 2), // 2R      + 2B

    ivec2(0, 3), //      1G + 3B
    ivec2(3, 0), // 3R + 1G

    ivec2(0, 4), //           4B
    ivec2(4, 0), // 4R

    ivec2(1, 2), //  R +  G + 2B
    ivec2(2, 1), // 2R +  G +  B

    ivec2(3, 1), // 3R      +  B
    ivec2(1, 3), //  R      + 3B
};

shared ivec3 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { };

ivec3 transform_sample(ivec3 pix, ivec2 rct_coef)
{
    pix.b -= pix.g;
    pix.r -= pix.g;
    pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
    pix.b += rct_offset;
    pix.r += rct_offset;
    return pix;
}

uint get_dist(ivec3 cur)
{
    ivec3 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1];
    ivec3 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0];
    ivec3 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0];

    ivec3 pred = ivec3(predict(LL.r, ivec2(TL.r, TT.r)),
                       predict(LL.g, ivec2(TL.g, TT.g)),
                       predict(LL.b, ivec2(TL.b, TT.b)));

    uvec3 c = abs(pred - cur);
    return mid_pred(c.r, c.g, c.b);
}

shared uint score_cols[gl_WorkGroupSize.y] = { };
shared uint score_mode[16] = { };

void process(ivec2 pos)
{
    ivec3 pix = load_components(pos);

    for (int i = 0; i < NUM_CHECKS; i++) {
        ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]);
        pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix;
        memoryBarrierShared();

        uint dist = get_dist(tx_pix);
        atomicAdd(score_mode[i], dist);
    }
}

void coeff_search(inout SliceContext sc)
{
    uvec2 img_size = imageSize(src[0]);
    uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
                           gl_NumWorkGroups.x, 0);
    uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
                           gl_NumWorkGroups.x, 0);
    uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
                           gl_NumWorkGroups.y, 0);
    uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
                           gl_NumWorkGroups.y, 0);

    for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) {
        for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) {
            process(ivec2(x, y));
        }
    }

    if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
        uint min_score = 0xFFFFFFFF;
        uint min_idx = 3;
        for (int i = 0; i < NUM_CHECKS; i++) {
            if (score_mode[i] < min_score) {
                min_score = score_mode[i];
                min_idx = i;
            }
        }
        sc.slice_rct_coef = rct_y_coeff[min_idx];
    }
}

void main(void)
{
    if (force_pcm == 1)
        return;
    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
    coeff_search(slice_ctx[slice_idx]);
}