File: vdot-exec.c

package info (click to toggle)
gcc-arm-none-eabi 15%3A8-2019-q3-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 571,828 kB
  • sloc: ansic: 2,937,651; cpp: 881,644; ada: 597,189; makefile: 65,528; asm: 56,499; xml: 46,621; exp: 24,747; sh: 19,684; python: 7,256; pascal: 4,370; awk: 3,497; perl: 2,695; yacc: 316; ml: 285; f90: 234; lex: 198; objc: 194; haskell: 119
file content (55 lines) | stat: -rw-r--r-- 1,820 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
/* { dg-do run } */
/* { dg-additional-options "-O3" } */
/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */
/* { dg-add-options arm_v8_2a_dotprod_neon }  */

#include <arm_neon.h>

extern void abort();

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define ORDER(x, y) y
#else
# define ORDER(x, y) x - y
#endif

#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2
#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ }
#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \
	ARR(f, x, t1, r1);		    \
	ARR(f, y, t2, r2);		    \
	t3 f##_##r = {0};		    \
	f##_##r = f (f##_##r, f##_##x, f##_##y);  \
	if (f##_##r[0] != n1 || f##_##r[1] != n2)   \
	  abort ();

#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \
	ARR(f, x, t1, r1);		    \
	ARR(f, y, t2, r2);		    \
	t3 f##_##rx = {0};		    \
	f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0));  \
	if (f##_##rx[0] != n1 || f##_##rx[1] != n2)   \
	  abort ();				    \
	t3 f##_##rx1 = {0};			    \
	f##_##rx1 =  f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1));  \
	if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4)   \
	  abort (); \

int
main()
{
  TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24);
  TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24);

  TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24);
  TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24);

  TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);

  TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);

  TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
  TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);

  return 0;
}