1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
#include<simdfuncs.h>
#include<stdio.h>
#include<string.h>
typedef void (*simd_func)(float*);
int check_simd_implementation(float *four,
const float *four_initial,
const char *simd_type,
const float *expected,
simd_func fptr,
const int blocksize) {
int rv = 0;
memcpy(four, four_initial, blocksize*sizeof(float));
printf("Using %s.\n", simd_type);
fptr(four);
for(int i=0; i<blocksize; i++) {
if(four[i] != expected[i]) {
printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]);
rv = 1;
}
}
return rv;
}
int main(void) {
static const float four_initial[4] = {2.0, 3.0, 4.0, 5.0};
ALIGN_16 float four[4];
const float expected[4] = {3.0, 4.0, 5.0, 6.0};
int r=0;
const int blocksize = 4;
/*
* Test all implementations that the current CPU supports.
*/
#if HAVE_NEON
if(neon_available()) {
r += check_simd_implementation(four,
four_initial,
"NEON",
expected,
increment_neon,
blocksize);
}
#endif
#if HAVE_AVX2
if(avx2_available()) {
r += check_simd_implementation(four,
four_initial,
"AVX2",
expected,
increment_avx2,
blocksize);
}
#endif
#if HAVE_AVX
if(avx_available()) {
r += check_simd_implementation(four,
four_initial,
"AVC",
expected,
increment_avx,
blocksize);
}
#endif
#if HAVE_SSE42
if(sse42_available()) {
r += check_simd_implementation(four,
four_initial,
"SSR42",
expected,
increment_sse42,
blocksize);
}
#endif
#if HAVE_SSE41
if(sse41_available()) {
r += check_simd_implementation(four,
four_initial,
"SSE41",
expected,
increment_sse41,
blocksize);
}
#endif
#if HAVE_SSSE3
if(ssse3_available()) {
r += check_simd_implementation(four,
four_initial,
"SSSE3",
expected,
increment_ssse3,
blocksize);
}
#endif
#if HAVE_SSE3
if(sse3_available()) {
r += check_simd_implementation(four,
four_initial,
"SSE3",
expected,
increment_sse3,
blocksize);
}
#endif
#if HAVE_SSE2
if(sse2_available()) {
r += check_simd_implementation(four,
four_initial,
"SSE2",
expected,
increment_sse2,
blocksize);
}
#endif
#if HAVE_SSE
if(sse_available()) {
r += check_simd_implementation(four,
four_initial,
"SSE",
expected,
increment_sse,
blocksize);
}
#endif
#if HAVE_MMX
if(mmx_available()) {
r += check_simd_implementation(four,
four_initial,
"MMX",
expected,
increment_mmx,
blocksize);
}
#endif
r += check_simd_implementation(four,
four_initial,
"fallback",
expected,
increment_fallback,
blocksize);
return r;
}
|