File: float_to_s16.c

package info (click to toggle)
retroarch 1.7.3%2Bdfsg1-1
links: PTS
area: main
in suites: buster
size: 49,188 kB
sloc: ansic: 600,492; cpp: 23,670; objc: 8,299; asm: 6,404; sh: 2,203; xml: 2,144; makefile: 1,867; python: 1,582; java: 941; perl: 393
file content (160 lines) | stat: -rw-r--r-- 5,220 bytes
parent folder | download | duplicates (3)
/* Copyright  (C) 2010-2018 The RetroArch team
 *
 * ---------------------------------------------------------------------------------------
 * The following license statement only applies to this file (float_to_s16.c).
 * ---------------------------------------------------------------------------------------
 *
 * Permission is hereby granted, free of charge,
 * to any person obtaining a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
#include <stdint.h>
#include <stddef.h>

#if defined(__SSE2__)
#include <emmintrin.h>
#elif defined(__ALTIVEC__)
#include <altivec.h>
#endif

#include <features/features_cpu.h>
#include <audio/conversion/float_to_s16.h>

#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
static bool float_to_s16_neon_enabled = false;
void convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
#endif

/**
 * convert_float_to_s16:
 * @out               : output buffer
 * @in                : input buffer
 * @samples           : size of samples to be converted
 *
 * Converts floating point
 * to signed integer 16-bit.
 *
 * C implementation callback function.
 **/
void convert_float_to_s16(int16_t *out,
      const float *in, size_t samples)
{
   size_t i      = 0;
#if defined(__SSE2__)
   __m128 factor = _mm_set1_ps((float)0x8000);

   for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
   {
      __m128 input_l = _mm_loadu_ps(in + 0);
      __m128 input_r = _mm_loadu_ps(in + 4);
      __m128 res_l   = _mm_mul_ps(input_l, factor);
      __m128 res_r   = _mm_mul_ps(input_r, factor);
      __m128i ints_l = _mm_cvtps_epi32(res_l);
      __m128i ints_r = _mm_cvtps_epi32(res_r);
      __m128i packed = _mm_packs_epi32(ints_l, ints_r);

      _mm_storeu_si128((__m128i *)out, packed);
   }

   samples = samples - i;
   i       = 0;
#elif defined(__ALTIVEC__)
   int samples_in = samples;

   /* Unaligned loads/store is a bit expensive,
    * so we optimize for the good path (very likely). */
   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
   {
      size_t i;
      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
      {
         vector float       input0 = vec_ld( 0, in);
         vector float       input1 = vec_ld(16, in);
         vector signed int result0 = vec_cts(input0, 15);
         vector signed int result1 = vec_cts(input1, 15);
         vec_st(vec_packs(result0, result1), 0, out);
      }

      samples_in -= i;
   }

   samples = samples_in;
   i       = 0;
#elif defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
   if (float_to_s16_neon_enabled)
   {
      size_t aligned_samples = samples & ~7;
      if (aligned_samples)
         convert_float_s16_asm(out, in, aligned_samples);

      out     = out     + aligned_samples;
      in      = in      + aligned_samples;
      samples = samples - aligned_samples;
      i       = 0;
   }
#elif defined(_MIPS_ARCH_ALLEGREX)

#ifdef DEBUG
   /* Make sure the buffers are 16 byte aligned, this should be
    * the default behaviour of malloc in the PSPSDK.
    * Assume alignment. */
   retro_assert(((uintptr_t)in  & 0xf) == 0);
   retro_assert(((uintptr_t)out & 0xf) == 0);
#endif

   for (i = 0; i + 8 <= samples; i += 8)
   {
      __asm__ (
            ".set    push                 \n"
            ".set    noreorder            \n"

            "lv.q    c100,  0(%0)         \n"
            "lv.q    c110,  16(%0)        \n"

            "vf2in.q c100, c100, 31       \n"
            "vf2in.q c110, c110, 31       \n"
            "vi2s.q  c100, c100           \n"
            "vi2s.q  c102, c110           \n"

            "sv.q    c100,  0(%1)         \n"

            ".set    pop                  \n"
            :: "r"(in + i), "r"(out + i));
   }

#endif

   for (; i < samples; i++)
   {
      int32_t val = (int32_t)(in[i] * 0x8000);
      out[i]      = (val > 0x7FFF) ? 0x7FFF :
         (val < -0x8000 ? -0x8000 : (int16_t)val);
   }
}

/**
 * convert_float_to_s16_init_simd:
 *
 * Sets up function pointers for conversion
 * functions based on CPU features.
 **/
void convert_float_to_s16_init_simd(void)
{
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
   unsigned cpu = cpu_features_get();

   if (cpu & RETRO_SIMD_NEON)
      float_to_s16_neon_enabled = true;
#endif
}