File: ffv1_rct_search.comp

package info (click to toggle)
ffmpeg 7%3A8.0.1-3
links: PTS, VCS
area: main
in suites: forky, sid
size: 111,180 kB
sloc: ansic: 1,348,518; asm: 145,582; sh: 9,273; makefile: 5,323; cpp: 3,451; lisp: 1,771; perl: 1,303; objc: 1,058; python: 120; awk: 56; ruby: 51
file content (139 lines) | stat: -rw-r--r-- 4,220 bytes
parent folder | download | duplicates (6)
/*
 * FFv1 codec
 *
 * Copyright (c) 2024 Lynne <dev@lynne.ee>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

ivec3 load_components(ivec2 pos)
{
    ivec3 pix = ivec3(imageLoad(src[0], pos));
    if (planar_rgb != 0) {
        for (int i = 1; i < 3; i++)
            pix[i] = int(imageLoad(src[i], pos)[0]);
    }

    return ivec3(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]]);
}

#define NUM_CHECKS 15
const ivec2 rct_y_coeff[NUM_CHECKS] = {
    ivec2(0, 0), //      4G

    ivec2(0, 1), //      3G +  B
    ivec2(1, 0), //  R + 3G
    ivec2(1, 1), //  R + 2G + B

    ivec2(0, 2), //      2G + 2B
    ivec2(2, 0), // 2R + 2G
    ivec2(2, 2), // 2R      + 2B

    ivec2(0, 3), //      1G + 3B
    ivec2(3, 0), // 3R + 1G

    ivec2(0, 4), //           4B
    ivec2(4, 0), // 4R

    ivec2(1, 2), //  R +  G + 2B
    ivec2(2, 1), // 2R +  G +  B

    ivec2(3, 1), // 3R      +  B
    ivec2(1, 3), //  R      + 3B
};

shared ivec3 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { };

ivec3 transform_sample(ivec3 pix, ivec2 rct_coef)
{
    pix.b -= pix.g;
    pix.r -= pix.g;
    pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
    pix.b += rct_offset;
    pix.r += rct_offset;
    return pix;
}

uint get_dist(ivec3 cur)
{
    ivec3 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1];
    ivec3 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0];
    ivec3 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0];

    ivec3 pred = ivec3(predict(LL.r, ivec2(TL.r, TT.r)),
                       predict(LL.g, ivec2(TL.g, TT.g)),
                       predict(LL.b, ivec2(TL.b, TT.b)));

    uvec3 c = abs(pred - cur);
    return mid_pred(c.r, c.g, c.b);
}

shared uint score_cols[gl_WorkGroupSize.y] = { };
shared uint score_mode[16] = { };

void process(ivec2 pos)
{
    ivec3 pix = load_components(pos);

    for (int i = 0; i < NUM_CHECKS; i++) {
        ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]);
        pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix;
        memoryBarrierShared();

        uint dist = get_dist(tx_pix);
        atomicAdd(score_mode[i], dist);
    }
}

void coeff_search(inout SliceContext sc)
{
    uvec2 img_size = imageSize(src[0]);
    uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
                           gl_NumWorkGroups.x, 0);
    uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
                           gl_NumWorkGroups.x, 0);
    uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
                           gl_NumWorkGroups.y, 0);
    uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
                           gl_NumWorkGroups.y, 0);

    for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) {
        for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) {
            process(ivec2(x, y));
        }
    }

    if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
        uint min_score = 0xFFFFFFFF;
        uint min_idx = 3;
        for (int i = 0; i < NUM_CHECKS; i++) {
            if (score_mode[i] < min_score) {
                min_score = score_mode[i];
                min_idx = i;
            }
        }
        sc.slice_rct_coef = rct_y_coeff[min_idx];
    }
}

void main(void)
{
    if (force_pcm == 1)
        return;
    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
    coeff_search(slice_ctx[slice_idx]);
}