File: normalize.h

package info (click to toggle)
lsp-plugins 1.2.5-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 91,856 kB
  • sloc: cpp: 427,831; xml: 57,779; makefile: 9,961; php: 1,005; sh: 18
file content (114 lines) | stat: -rw-r--r-- 5,228 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
 *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
 *
 * This file is part of lsp-dsp-lib
 * Created on: 31 мар. 2020 г.
 *
 * lsp-dsp-lib is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * lsp-dsp-lib is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
 */

#ifndef PRIVATE_DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_
#define PRIVATE_DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_

#ifndef PRIVATE_DSP_ARCH_X86_SSE_IMPL
    #error "This header should not be included directly"
#endif /* PRIVATE_DSP_ARCH_X86_SSE_IMPL */

namespace lsp
{
    namespace sse
    {
        void normalize_fft3(float *dre, float *dim, const float *re, const float *im, size_t rank)
        {
            IF_ARCH_X86(
                float k = 1.0f/(1 << rank);
                size_t count = 1 << rank, off = 0;
            );
            ARCH_X86_ASM(
                // x8 blocks
                __ASM_EMIT  ("shufps        $0x00, %%xmm0, %%xmm0")                 // xmm0   = k
                __ASM_EMIT32("subl          $8, %[count]")
                __ASM_EMIT64("sub           $8, %[count]")
                __ASM_EMIT  ("movaps        %%xmm0, %%xmm1")
                __ASM_EMIT  ("jb            2f")
                __ASM_EMIT  ("1:")
                __ASM_EMIT  ("movups        0x00(%[s_re], %[off]), %%xmm4")
                __ASM_EMIT  ("movups        0x10(%[s_re], %[off]), %%xmm5")
                __ASM_EMIT  ("movups        0x00(%[s_im], %[off]), %%xmm6")
                __ASM_EMIT  ("movups        0x10(%[s_im], %[off]), %%xmm7")
                __ASM_EMIT  ("mulps         %%xmm0, %%xmm4")
                __ASM_EMIT  ("mulps         %%xmm1, %%xmm5")
                __ASM_EMIT  ("mulps         %%xmm0, %%xmm6")
                __ASM_EMIT  ("mulps         %%xmm1, %%xmm7")
                __ASM_EMIT  ("movups        %%xmm4, 0x00(%[d_re], %[off])")
                __ASM_EMIT  ("movups        %%xmm5, 0x10(%[d_re], %[off])")
                __ASM_EMIT  ("movups        %%xmm6, 0x00(%[d_im], %[off])")
                __ASM_EMIT  ("movups        %%xmm7, 0x10(%[d_im], %[off])")
                __ASM_EMIT  ("add           $0x20, %[off]")
                __ASM_EMIT32("subl          $8, %[count]")
                __ASM_EMIT64("sub           $8, %[count]")
                __ASM_EMIT  ("jae           1b")
                __ASM_EMIT  ("2:")
                : [off] "+r" (off), [count] __ASM_ARG_RW(count),
                  [k] "+Yz" (k)
                : [s_re] "r" (re), [s_im] "r" (im),
                  [d_re] "r" (dre), [d_im] "r" (dim)
                : "cc", "memory",
                  "%xmm1",
                  "%xmm4", "%xmm5", "%xmm6", "%xmm7"
            );
        }

        void normalize_fft2(float *re, float *im, size_t rank)
        {
            IF_ARCH_X86(
                float k = 1.0f/(1 << rank);
                size_t count = 1 << rank, off = 0;
            );
            ARCH_X86_ASM(
                // x8 blocks
                __ASM_EMIT  ("shufps        $0x00, %%xmm0, %%xmm0")                 // xmm0   = k
                __ASM_EMIT  ("sub           $8, %[count]")
                __ASM_EMIT  ("movaps        %%xmm0, %%xmm1")
                __ASM_EMIT  ("jb            2f")
                __ASM_EMIT  ("1:")
                __ASM_EMIT  ("movups        0x00(%[d_re], %[off]), %%xmm4")
                __ASM_EMIT  ("movups        0x10(%[d_re], %[off]), %%xmm5")
                __ASM_EMIT  ("movups        0x00(%[d_im], %[off]), %%xmm6")
                __ASM_EMIT  ("movups        0x10(%[d_im], %[off]), %%xmm7")
                __ASM_EMIT  ("mulps         %%xmm0, %%xmm4")
                __ASM_EMIT  ("mulps         %%xmm1, %%xmm5")
                __ASM_EMIT  ("mulps         %%xmm0, %%xmm6")
                __ASM_EMIT  ("mulps         %%xmm1, %%xmm7")
                __ASM_EMIT  ("movups        %%xmm4, 0x00(%[d_re], %[off])")
                __ASM_EMIT  ("movups        %%xmm5, 0x10(%[d_re], %[off])")
                __ASM_EMIT  ("movups        %%xmm6, 0x00(%[d_im], %[off])")
                __ASM_EMIT  ("movups        %%xmm7, 0x10(%[d_im], %[off])")
                __ASM_EMIT  ("add           $0x20, %[off]")
                __ASM_EMIT  ("sub           $8, %[count]")
                __ASM_EMIT  ("jae           1b")
                __ASM_EMIT  ("2:")
                : [off] "+r" (off), [count] "+r" (count),
                  [k] "+Yz" (k)
                : [d_re] "r" (re), [d_im] "r" (im)
                : "cc", "memory",
                  "%xmm1",
                  "%xmm4", "%xmm5", "%xmm6", "%xmm7"
            );
        }
    }
}

#endif /* PRIVATE_DSP_ARCH_X86_SSE_FFT_NORMALIZE_H_ */