1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
//
// Test_v3dot.cpp
// BulletTest
//
// Copyright (c) 2011 Apple Inc.
//
#include "LinearMath/btScalar.h"
#if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
#include "Test_dot3.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>
#include <LinearMath/btVector3.h>
// reference code for testing purposes
static btVector3 dot3_ref(const btVector3 &, const btVector3 &, const btVector3 &, const btVector3 &);
static btVector3 dot3_ref(const btVector3 &v, const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
{
return btVector3(v.dot(v1), v.dot(v2), v.dot(v3));
}
/*
SIMD_FORCE_INLINE int operator!=(const btVector3 &s, const btVector3 &v)
{
#ifdef __SSE__
__m128 test = _mm_cmpneq_ps( s.mVec128, v.mVec128 );
return (_mm_movemask_ps( test ) & 7) != 0;
#elif defined __ARM_NEON_H
uint32x4_t test = vandq_u32( vceqq_f32( s.mVec128, v.mVec128 ), (uint32x4_t){-1,-1,-1,0});
uint32x2_t t = vpadd_u32( vget_low_u32(test), vget_high_u32(test));
t = vpadd_u32(t, t);
return -3 != (int32_t) vget_lane_u32(t, 0);
#else
return s.m_floats[0] != v.m_floats[0] ||
s.m_floats[1] != v.m_floats[1] ||
s.m_floats[2] != v.m_floats[2];
#endif
}
*/
#define LOOPCOUNT 1000
#define NUM_CYCLES 10000
int Test_dot3(void)
{
btVector3 v, v1, v2, v3;
#define DATA_SIZE 1024
btVector3 vec3_arr[DATA_SIZE];
btVector3 vec3_arr1[DATA_SIZE];
btVector3 vec3_arr2[DATA_SIZE];
btVector3 vec3_arr3[DATA_SIZE];
btVector3 res_arr[DATA_SIZE];
uint64_t scalarTime;
uint64_t vectorTime;
size_t j, k;
btVector3 correct, test;
for (k = 0; k < DATA_SIZE; k++)
{
vec3_arr[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
vec3_arr1[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
vec3_arr2[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
vec3_arr3[k] = btVector3(btAssign128(RANDF, RANDF, RANDF, BT_NAN));
correct = dot3_ref(vec3_arr[k], vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
test = vec3_arr[k].dot3(vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
if (correct != test)
{
vlog("Error (%ld) - dot3 result error! *{%a, %a, %a, %a} != {%a, %a, %a, %a} \n", k,
correct.x(), correct.y(), correct.z(), correct.w(),
test.x(), test.y(), test.z(), test.w());
return 1;
}
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
scalarTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = (k & (DATA_SIZE - 1));
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = dot3_ref(vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
scalarTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
scalarTime = bestTime;
else
scalarTime /= NUM_CYCLES;
}
{
uint64_t startTime, bestTime, currentTime;
bestTime = -1LL;
vectorTime = 0;
for (j = 0; j < NUM_CYCLES; j++)
{
startTime = ReadTicks();
for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
{
size_t k32 = (k & (DATA_SIZE - 1));
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
k32++;
res_arr[k32] = vec3_arr[k32].dot3(vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]);
}
currentTime = ReadTicks() - startTime;
vectorTime += currentTime;
if (currentTime < bestTime)
bestTime = currentTime;
}
if (0 == gReportAverageTimes)
vectorTime = bestTime;
else
vectorTime /= NUM_CYCLES;
}
vlog("Timing:\n");
vlog(" \t scalar\t vector\n");
vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT);
return 0;
}
#endif //BT_USE_SSE
|