1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include "libbase64.h"
#include "../../tables/tables.h"
#include "../../codecs.h"
#include "config.h"
#include "../../env.h"
#if HAVE_NEON64
#include <arm_neon.h>
// Only enable inline assembly on supported compilers.
#if !defined(__wasm__) && (defined(__GNUC__) || defined(__clang__))
#define BASE64_NEON64_USE_ASM
#endif
static BASE64_FORCE_INLINE uint8x16x4_t
load_64byte_table (const uint8_t *p)
{
#ifdef BASE64_NEON64_USE_ASM
// Force the table to be loaded into contiguous registers. GCC will not
// normally allocate contiguous registers for a `uint8x16x4_t'. These
// registers are chosen to not conflict with the ones in the enc loop.
register uint8x16_t t0 __asm__ ("v8");
register uint8x16_t t1 __asm__ ("v9");
register uint8x16_t t2 __asm__ ("v10");
register uint8x16_t t3 __asm__ ("v11");
__asm__ (
"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
: [src] "+r" (p),
[t0] "=w" (t0),
[t1] "=w" (t1),
[t2] "=w" (t2),
[t3] "=w" (t3)
);
return (uint8x16x4_t) {
.val[0] = t0,
.val[1] = t1,
.val[2] = t2,
.val[3] = t3,
};
#else
return vld1q_u8_x4(p);
#endif
}
#include "../generic/32/dec_loop.c"
#include "../generic/64/enc_loop.c"
#include "dec_loop.c"
#ifdef BASE64_NEON64_USE_ASM
# include "enc_loop_asm.c"
#else
# include "enc_reshuffle.c"
# include "enc_loop.c"
#endif
#endif // HAVE_NEON64
// Stride size is so large on these NEON 64-bit functions
// (48 bytes encode, 64 bytes decode) that we inline the
// uint64 codec to stay performant on smaller inputs.
void
base64_stream_encode_neon64 BASE64_ENC_PARAMS
{
#if HAVE_NEON64
#include "../generic/enc_head.c"
enc_loop_neon64(&s, &slen, &o, &olen);
enc_loop_generic_64(&s, &slen, &o, &olen);
#include "../generic/enc_tail.c"
#else
base64_enc_stub(state, src, srclen, out, outlen);
#endif
}
int
base64_stream_decode_neon64 BASE64_DEC_PARAMS
{
#if HAVE_NEON64
#include "../generic/dec_head.c"
dec_loop_neon64(&s, &slen, &o, &olen);
dec_loop_generic_32(&s, &slen, &o, &olen);
#include "../generic/dec_tail.c"
#else
return base64_dec_stub(state, src, srclen, out, outlen);
#endif
}
|