File: dct.comp

package info (click to toggle)
chromium 145.0.7632.159-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,976,224 kB
  • sloc: cpp: 36,198,469; ansic: 7,634,080; javascript: 3,564,060; python: 1,649,622; xml: 838,470; asm: 717,087; pascal: 185,708; sh: 88,786; perl: 88,718; objc: 79,984; sql: 59,811; cs: 42,452; fortran: 24,101; makefile: 21,144; tcl: 15,277; php: 14,022; yacc: 9,066; ruby: 7,553; awk: 3,720; lisp: 3,233; lex: 1,328; ada: 727; jsp: 228; sed: 36
file content (119 lines) | stat: -rw-r--r-- 4,665 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*
 * Copyright (c) 2025 Lynne <dev@lynne.ee>
 * Copyright (c) 2016 Nathan Egge <unlord@xiph.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * Orthonormal inverse 8-point Type-II DCT based on the Chen factorization[1].
 * 1D with scale factors moved up front.
 * This computes an n-point Type-II DCT by first computing an n/2-point Type-II DCT
 * of the even indexed inputs and an n/2-point Type-IV DST of the odd indexed inputs,
 * and then combining them using a "butterfly" operation.
 *
 * [1] W.H. Chen, C. Smith, and S. Fralick,
 * "A Fast Computational Algorithm for the Discrete Cosine Transform",
 * IEEE Transactions on Communications, Vol. 25, No. 9, pp 1004-1009, Sept. 1977
 */

#ifndef NB_COMPONENTS
#define NB_COMPONENTS 1
#endif

/* Padded by 1 row to avoid bank conflicts */
shared float blocks[NB_BLOCKS][NB_COMPONENTS*8*(8 + 1)];

const float idct_scale[64] = {
    0.1250000000000000, 0.1733799806652684, 0.1633203706095471, 0.1469844503024199,
    0.1250000000000000, 0.0982118697983878, 0.0676495125182746, 0.0344874224103679,
    0.1733799806652684, 0.2404849415639108, 0.2265318615882219, 0.2038732892122293,
    0.1733799806652684, 0.1362237766939547, 0.0938325693794663, 0.0478354290456362,
    0.1633203706095471, 0.2265318615882219, 0.2133883476483184, 0.1920444391778541,
    0.1633203706095471, 0.1283199917898342, 0.0883883476483185, 0.0450599888754343,
    0.1469844503024199, 0.2038732892122293, 0.1920444391778541, 0.1728354290456362,
    0.1469844503024199, 0.1154849415639109, 0.0795474112858021, 0.0405529186026822,
    0.1250000000000000, 0.1733799806652684, 0.1633203706095471, 0.1469844503024199,
    0.1250000000000000, 0.0982118697983878, 0.0676495125182746, 0.0344874224103679,
    0.0982118697983878, 0.1362237766939547, 0.1283199917898342, 0.1154849415639109,
    0.0982118697983878, 0.0771645709543638, 0.0531518809229535, 0.0270965939155924,
    0.0676495125182746, 0.0938325693794663, 0.0883883476483185, 0.0795474112858021,
    0.0676495125182746, 0.0531518809229535, 0.0366116523516816, 0.0186644585125857,
    0.0344874224103679, 0.0478354290456362, 0.0450599888754343, 0.0405529186026822,
    0.0344874224103679, 0.0270965939155924, 0.0186644585125857, 0.0095150584360892,
};

void idct8(uint block, uint offset, uint stride)
{
    float t0, t1, t2, t3, t4, t5, t6, t7, u8;
    float u0, u1, u2, u3, u4, u5, u6, u7;

    /* Input */
    t0 = blocks[block][0*stride + offset];
    u4 = blocks[block][1*stride + offset];
    t2 = blocks[block][2*stride + offset];
    u6 = blocks[block][3*stride + offset];
    t1 = blocks[block][4*stride + offset];
    u5 = blocks[block][5*stride + offset];
    t3 = blocks[block][6*stride + offset];
    u7 = blocks[block][7*stride + offset];

    /* Embedded scaled inverse 4-point Type-II DCT */
    u0 = t0 + t1;
    u1 = t0 - t1;
    u3 = t2 + t3;
    u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
    t0 = u0 + u3;
    t3 = u0 - u3;
    t1 = u1 + u2;
    t2 = u1 - u2;

    /* Embedded scaled inverse 4-point Type-IV DST */
    t5 = u5 + u6;
    t6 = u5 - u6;
    t7 = u4 + u7;
    t4 = u4 - u7;
    u7 = t7 + t5;
    u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
    u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
    u4 = u8 - t4*(1.0823922002923939687994464107328f);
    u6 = u8 - t6*(2.6131259297527530557132863468544f);
    t7 = u7;
    t6 = t7 - u6;
    t5 = t6 + u5;
    t4 = t5 - u4;

    /* Butterflies */
    u0 = t0 + t7;
    u7 = t0 - t7;
    u6 = t1 + t6;
    u1 = t1 - t6;
    u2 = t2 + t5;
    u5 = t2 - t5;
    u4 = t3 + t4;
    u3 = t3 - t4;

    /* Output */
    blocks[block][0*stride + offset] = u0;
    blocks[block][1*stride + offset] = u1;
    blocks[block][2*stride + offset] = u2;
    blocks[block][3*stride + offset] = u3;
    blocks[block][4*stride + offset] = u4;
    blocks[block][5*stride + offset] = u5;
    blocks[block][6*stride + offset] = u6;
    blocks[block][7*stride + offset] = u7;
}