1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
|
/*
* FAAC - Freeware Advanced Audio Coder
* Copyright (C) 2026 Nils Schimmelmann
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <immintrin.h>
#include "faac_real.h"
#include "quantize.h"
void quantize_sse2(const faac_real * __restrict xr, int * __restrict xi, int n, faac_real sfacfix)
{
const __m128 zero = _mm_setzero_ps();
const __m128 sfac = _mm_set1_ps(sfacfix);
const __m128 magic = _mm_set1_ps(MAGIC_NUMBER);
// Mask to strip the sign bit (0x7FFFFFFF)
const __m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
int cnt = 0;
// Process 4 elements per iteration
for (; cnt <= n - 4; cnt += 4)
{
#ifdef FAAC_PRECISION_SINGLE
__m128 x_orig = _mm_loadu_ps((const float*)&xr[cnt]);
#else
// Convert 4 doubles to 4 floats via two 128-bit loads
__m128 low = _mm_cvtpd_ps(_mm_loadu_pd(&xr[cnt]));
__m128 high = _mm_cvtpd_ps(_mm_loadu_pd(&xr[cnt + 2]));
__m128 x_orig = _mm_movelh_ps(low, high);
#endif
// Capture sign and Absolute value
__m128 sign_mask = _mm_cmplt_ps(x_orig, zero);
__m128 x = _mm_and_ps(x_orig, abs_mask);
// Math: (x * sfac)^0.75 + magic
// Logic: sqrt( (x*sfac) * sqrt(x*sfac) )
x = _mm_mul_ps(x, sfac);
x = _mm_mul_ps(x, _mm_sqrt_ps(x));
x = _mm_sqrt_ps(x);
x = _mm_add_ps(x, magic);
// Convert to integer
__m128i xi_vec = _mm_cvttps_epi32(x);
// Bitwise Sign Fix: (val ^ mask) - mask
__m128i m_int = _mm_castps_si128(sign_mask);
xi_vec = _mm_sub_epi32(_mm_xor_si128(xi_vec, m_int), m_int);
_mm_storeu_si128((__m128i*)&xi[cnt], xi_vec);
}
// Safe scalar remainder loop for widths not multiple of 4
for (; cnt < n; cnt++)
{
faac_real val = xr[cnt];
faac_real tmp = FAAC_FABS(val);
tmp *= sfacfix;
tmp = FAAC_SQRT(tmp * FAAC_SQRT(tmp));
int q = (int)(tmp + (faac_real)MAGIC_NUMBER);
xi[cnt] = (val < 0) ? -q : q;
}
}
|