File: Test_dot3.cpp

package info (click to toggle)
bullet 3.24%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 15,164 kB
  • sloc: cpp: 246,331; lisp: 12,017; ansic: 11,175; python: 630; makefile: 136; sh: 75
file content (153 lines) | stat: -rw-r--r-- 4,254 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
//
//  Test_v3dot.cpp
//  BulletTest
//
//  Copyright (c) 2011 Apple Inc.
//

#include "LinearMath/btScalar.h"
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)

#include "Test_dot3.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>

#include <LinearMath/btVector3.h>

// reference code for testing purposes
static btVector3 dot3_ref(const btVector3 &, const btVector3 &, const btVector3 &, const btVector3 &);
static btVector3 dot3_ref(const btVector3 &v, const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
{
	return btVector3(v.dot(v1), v.dot(v2), v.dot(v3));
}

/*
SIMD_FORCE_INLINE int operator!=(const btVector3 &s, const btVector3 &v)
{
#ifdef __SSE__
    __m128 test = _mm_cmpneq_ps( s.mVec128, v.mVec128 );
    return (_mm_movemask_ps( test ) & 7) != 0;
#elif defined __ARM_NEON_H
    uint32x4_t test = vandq_u32( vceqq_f32( s.mVec128, v.mVec128 ), (uint32x4_t){-1,-1,-1,0});
    uint32x2_t t = vpadd_u32( vget_low_u32(test), vget_high_u32(test));
    t = vpadd_u32(t, t);
    return -3 != (int32_t) vget_lane_u32(t, 0);
#else
    return  s.m_floats[0] != v.m_floats[0] ||
    s.m_floats[1] != v.m_floats[1] ||
    s.m_floats[2] != v.m_floats[2];
#endif
}
*/

#define LOOPCOUNT 1000
#define NUM_CYCLES 10000

int Test_dot3(void)
{
	btVector3 v, v1, v2, v3;

#define DATA_SIZE 1024

	btVector3 vec3_arr[DATA_SIZE];
	btVector3 vec3_arr1[DATA_SIZE];
	btVector3 vec3_arr2[DATA_SIZE];
	btVector3 vec3_arr3[DATA_SIZE];
	btVector3 res_arr[DATA_SIZE];

	uint64_t scalarTime;
	uint64_t vectorTime;
	size_t j, k;
	btVector3 correct, test;

	for (k = 0; k < DATA_SIZE; k++)
	{
		vec3_arr[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
		vec3_arr1[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
		vec3_arr2[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
		vec3_arr3[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));

		correct = dot3_ref(vec3_arr[k], vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
		test = vec3_arr[k].dot3(vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);

		if (correct != test)
		{
			vlog("Error (%ld) - dot3 result error! *{%a, %a, %a, %a} != {%a, %a, %a, %a} \n", k,
				 correct.x(), correct.y(), correct.z(), correct.w(),
				 test.x(), test.y(), test.z(), test.w());

			return 1;
		}
	}

	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		scalarTime = 0;
		for (j = 0; j < NUM_CYCLES; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
			{
				size_t k32 = (k & (DATA_SIZE - 1));
				res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
				k32++;
				res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
				k32++;
				res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
				k32++;
				res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
			}
			currentTime = ReadTicks() - startTime;
			scalarTime += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			scalarTime = bestTime;
		else
			scalarTime /= NUM_CYCLES;
	}

	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		vectorTime = 0;
		for (j = 0; j < NUM_CYCLES; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
			{
				size_t k32 = (k & (DATA_SIZE - 1));
				res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
				k32++;
				res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
				k32++;
				res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
				k32++;
				res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
			}
			currentTime = ReadTicks() - startTime;
			vectorTime += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			vectorTime = bestTime;
		else
			vectorTime /= NUM_CYCLES;
	}

	vlog("Timing:\n");
	vlog("     \t    scalar\t    vector\n");
	vlog("    \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);

	return 0;
}

#endif  //BT_USE_SSE