File: codec.c

package info (click to toggle)
python-librt 0.7.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 920 kB
  • sloc: ansic: 13,889; python: 293; makefile: 6
file content (93 lines) | stat: -rw-r--r-- 2,190 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <stdint.h>
#include <stddef.h>
#include <string.h>

#include "libbase64.h"
#include "../../tables/tables.h"
#include "../../codecs.h"
#include "config.h"
#include "../../env.h"

#if HAVE_NEON64
#include <arm_neon.h>

// Only enable inline assembly on supported compilers.
#if !defined(__wasm__) && (defined(__GNUC__) || defined(__clang__))
#define BASE64_NEON64_USE_ASM
#endif

static BASE64_FORCE_INLINE uint8x16x4_t
load_64byte_table (const uint8_t *p)
{
#ifdef BASE64_NEON64_USE_ASM

	// Force the table to be loaded into contiguous registers. GCC will not
	// normally allocate contiguous registers for a `uint8x16x4_t'. These
	// registers are chosen to not conflict with the ones in the enc loop.
	register uint8x16_t t0 __asm__ ("v8");
	register uint8x16_t t1 __asm__ ("v9");
	register uint8x16_t t2 __asm__ ("v10");
	register uint8x16_t t3 __asm__ ("v11");

	__asm__ (
		"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
		: [src] "+r" (p),
		  [t0]  "=w" (t0),
		  [t1]  "=w" (t1),
		  [t2]  "=w" (t2),
		  [t3]  "=w" (t3)
	);

	return (uint8x16x4_t) {
		.val[0] = t0,
		.val[1] = t1,
		.val[2] = t2,
		.val[3] = t3,
	};
#else
	return vld1q_u8_x4(p);
#endif
}

#include "../generic/32/dec_loop.c"
#include "../generic/64/enc_loop.c"
#include "dec_loop.c"

#ifdef BASE64_NEON64_USE_ASM
# include "enc_loop_asm.c"
#else
# include "enc_reshuffle.c"
# include "enc_loop.c"
#endif

#endif	// HAVE_NEON64

// Stride size is so large on these NEON 64-bit functions
// (48 bytes encode, 64 bytes decode) that we inline the
// uint64 codec to stay performant on smaller inputs.

void
base64_stream_encode_neon64 BASE64_ENC_PARAMS
{
#if HAVE_NEON64
	#include "../generic/enc_head.c"
	enc_loop_neon64(&s, &slen, &o, &olen);
	enc_loop_generic_64(&s, &slen, &o, &olen);
	#include "../generic/enc_tail.c"
#else
	base64_enc_stub(state, src, srclen, out, outlen);
#endif
}

int
base64_stream_decode_neon64 BASE64_DEC_PARAMS
{
#if HAVE_NEON64
	#include "../generic/dec_head.c"
	dec_loop_neon64(&s, &slen, &o, &olen);
	dec_loop_generic_32(&s, &slen, &o, &olen);
	#include "../generic/dec_tail.c"
#else
	return base64_dec_stub(state, src, srclen, out, outlen);
#endif
}