File: test_sse4_1.cpp

package info (click to toggle)
emscripten 2.0.12~dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 108,440 kB
  • sloc: ansic: 510,324; cpp: 384,763; javascript: 84,341; python: 51,362; sh: 50,019; pascal: 4,159; makefile: 3,409; asm: 2,150; lisp: 1,869; ruby: 488; cs: 142
file content (114 lines) | stat: -rw-r--r-- 6,605 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
 * Copyright 2020 The Emscripten Authors.  All rights reserved.
 * Emscripten is available under two separate licenses, the MIT license and the
 * University of Illinois/NCSA Open Source License.  Both these licenses can be
 * found in the LICENSE file.
 */
// This file uses SSE4.1 by calling different functions with different interesting inputs and prints the results.
// Use a diff tool to compare the results between platforms.

#include <smmintrin.h>
#include "test_sse.h"

bool testNaNBits = true;

float *interesting_floats = get_interesting_floats();
int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]);
uint32_t *interesting_ints = get_interesting_ints();
int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
double *interesting_doubles = get_interesting_doubles();
int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]);

void test_round()
{
	Ret_M128d(__m128d, _mm_ceil_pd);
	Ret_M128(__m128, _mm_ceil_ps);
	Ret_M128d_M128d(__m128d, _mm_ceil_sd);
	Ret_M128_M128(__m128, _mm_ceil_ss);	
	Ret_M128d(__m128d, _mm_floor_pd);
	Ret_M128(__m128, _mm_floor_ps);
	Ret_M128d_M128d(__m128d, _mm_floor_sd);
	Ret_M128_M128(__m128, _mm_floor_ss);
	Ret_M128d_Tint(__m128d, _mm_round_pd);
	Ret_M128_Tint(__m128, _mm_round_ps);
	Ret_M128d_M128d_Tint(__m128d, _mm_round_sd);
	Ret_M128_M128_Tint(__m128, _mm_round_ss);
}

int main()
{
	assert(numInterestingFloats % 4 == 0);
	assert(numInterestingInts % 4 == 0);
	assert(numInterestingDoubles % 4 == 0);	

	test_round();

	Ret_M128i_M128i_Tint(__m128i, _mm_blend_epi16);
	Ret_M128d_M128d_Tint(__m128d, _mm_blend_pd);
	Ret_M128_M128_Tint(__m128, _mm_blend_ps);
	Ret_M128i_M128i_M128i(__m128i, _mm_blendv_epi8);
	Ret_M128d_M128d_M128d(__m128d, _mm_blendv_pd);
	Ret_M128_M128_M128(__m128, _mm_blendv_ps);
	Ret_M128i_M128i(__m128i, _mm_cmpeq_epi64);
	Ret_M128i(__m128i, _mm_cvtepi16_epi32);
	Ret_M128i(__m128i, _mm_cvtepi16_epi64);
	Ret_M128i(__m128i, _mm_cvtepi32_epi64);
	Ret_M128i(__m128i, _mm_cvtepi8_epi16);
	Ret_M128i(__m128i, _mm_cvtepi8_epi32);
	Ret_M128i(__m128i, _mm_cvtepi8_epi64);
	Ret_M128i(__m128i, _mm_cvtepu16_epi32);
	Ret_M128i(__m128i, _mm_cvtepu16_epi64);
	Ret_M128i(__m128i, _mm_cvtepu32_epi64);
	Ret_M128i(__m128i, _mm_cvtepu8_epi16);
	Ret_M128i(__m128i, _mm_cvtepu8_epi32);
	Ret_M128i(__m128i, _mm_cvtepu8_epi64);
	testNaNBits = false;
	Ret_M128d_M128d_Tint(__m128d, _mm_dp_pd);
	Ret_M128_M128_Tint(__m128, _mm_dp_ps); // _mm_dp_ps emulation does not match NaN bit selection rules (seems to be unspecified)
	testNaNBits = true;
	Ret_M128i_Tint(int, _mm_extract_epi32);
	Ret_M128i_Tint(int64_t, _mm_extract_epi64);
	Ret_M128i_Tint(int, _mm_extract_epi8);
	Ret_M128_Tint(float, _mm_extract_ps);
	Ret_M128i_int_Tint(__m128i, _mm_insert_epi32);
	Ret_M128i_int_Tint(__m128i, _mm_insert_epi64);
	Ret_M128_M128_Tint(__m128, _mm_insert_ps);
	Ret_M128i_M128i(__m128i, _mm_max_epi32);
	Ret_M128i_M128i(__m128i, _mm_max_epi8);
	Ret_M128i_M128i(__m128i, _mm_max_epu16);
	Ret_M128i_M128i(__m128i, _mm_max_epu32);
	Ret_M128i_M128i(__m128i, _mm_min_epi32);
	Ret_M128i_M128i(__m128i, _mm_min_epi8);
	Ret_M128i_M128i(__m128i, _mm_min_epu16);
	Ret_M128i_M128i(__m128i, _mm_min_epu32);
	Ret_M128i(__m128i, _mm_minpos_epu16);
	Ret_M128i_M128i_Tint(__m128i, _mm_mpsadbw_epu8);
	Ret_M128i_M128i(__m128i, _mm_mul_epi32);
	Ret_M128i_M128i(__m128i, _mm_mullo_epi32);
	Ret_M128i_M128i(__m128i, _mm_packus_epi32);
	Ret_IntPtr(__m128i, _mm_stream_load_si128, __m128i*, 4, 4);
	Ret_M128i(int, _mm_test_all_ones);
	printf("_mm_test_all_ones(0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_ones(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_test_all_ones(0xFFFFFFFFFFFFFFFEull): %d\n", _mm_test_all_ones(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull)));
	printf("_mm_test_all_ones(0): %d\n", _mm_test_all_ones(_mm_set1_epi64x(0)));
	Ret_M128i_M128i(int, _mm_test_all_zeros);
	printf("_mm_test_all_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_test_all_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_test_all_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_zeros(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	Ret_M128i_M128i(int, _mm_test_mix_ones_zeros);
	printf("_mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_mix_ones_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_mix_ones_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_test_mix_ones_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_mix_ones_zeros(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	Ret_M128i_M128i(int, _mm_testc_si128);
	printf("_mm_testc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_testc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_testc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testc_si128(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	Ret_M128i_M128i(int, _mm_testnzc_si128);
	printf("_mm_testnzc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testnzc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_testnzc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testnzc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_testnzc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testnzc_si128(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	Ret_M128i_M128i(int, _mm_testz_si128);
	printf("_mm_testz_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testz_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_testz_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testz_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
	printf("_mm_testz_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testz_si128(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
}