1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
|
#define SIMDE_TEST_ARM_NEON_INSN mull
#include "test-neon.h"
#include "../../../simde/arm/neon/mull.h"
static int
test_simde_vmull_s8 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
int8_t a[8];
int8_t b[8];
int16_t r[8];
} test_vec[] = {
{ { INT8_C( 80), -INT8_C( 57), INT8_MIN, -INT8_C( 68), INT8_C( 13), -INT8_C( 44), INT8_C( 8), INT8_C( 65) },
{ -INT8_C( 55), -INT8_C( 20), INT8_C( 56), -INT8_C( 54), INT8_C( 110), INT8_C( 55), -INT8_C( 97), -INT8_C( 8) },
{ -INT16_C( 4400), INT16_C( 1140), -INT16_C( 7168), INT16_C( 3672), INT16_C( 1430), -INT16_C( 2420), -INT16_C( 776), -INT16_C( 520) } },
{ { INT8_C( 90), INT8_C( 52), INT8_C( 32), INT8_C( 61), -INT8_C( 126), INT8_C( 97), INT8_C( 42), -INT8_C( 90) },
{ INT8_C( 100), INT8_C( 38), -INT8_C( 122), INT8_C( 112), -INT8_C( 57), INT8_C( 19), -INT8_C( 61), INT8_C( 23) },
{ INT16_C( 9000), INT16_C( 1976), -INT16_C( 3904), INT16_C( 6832), INT16_C( 7182), INT16_C( 1843), -INT16_C( 2562), -INT16_C( 2070) } },
{ { -INT8_C( 38), INT8_C( 68), -INT8_C( 44), -INT8_C( 24), INT8_C( 24), -INT8_C( 36), INT8_C( 41), -INT8_C( 31) },
{ -INT8_C( 56), INT8_C( 97), -INT8_C( 85), INT8_C( 55), -INT8_C( 104), INT8_C( 74), INT8_C( 47), -INT8_C( 14) },
{ INT16_C( 2128), INT16_C( 6596), INT16_C( 3740), -INT16_C( 1320), -INT16_C( 2496), -INT16_C( 2664), INT16_C( 1927), INT16_C( 434) } },
{ { INT8_C( 126), INT8_C( 80), INT8_C( 48), INT8_C( 1), -INT8_C( 79), INT8_C( 90), -INT8_C( 89), INT8_C( 21) },
{ INT8_MIN, INT8_C( 46), -INT8_C( 123), INT8_C( 72), INT8_C( 65), INT8_C( 73), INT8_C( 95), INT8_C( 28) },
{ -INT16_C( 16128), INT16_C( 3680), -INT16_C( 5904), INT16_C( 72), -INT16_C( 5135), INT16_C( 6570), -INT16_C( 8455), INT16_C( 588) } },
{ { -INT8_C( 115), INT8_C( 51), INT8_C( 4), -INT8_C( 91), INT8_C( 16), INT8_C( 45), -INT8_C( 122), -INT8_C( 40) },
{ -INT8_C( 114), INT8_C( 49), INT8_C( 15), INT8_C( 38), INT8_C( 123), INT8_C( 63), INT8_C( 25), -INT8_C( 7) },
{ INT16_C( 13110), INT16_C( 2499), INT16_C( 60), -INT16_C( 3458), INT16_C( 1968), INT16_C( 2835), -INT16_C( 3050), INT16_C( 280) } },
{ { -INT8_C( 113), INT8_C( 73), -INT8_C( 6), INT8_C( 64), -INT8_C( 93), -INT8_C( 94), INT8_C( 86), INT8_C( 36) },
{ -INT8_C( 48), -INT8_C( 37), INT8_C( 108), INT8_C( 17), INT8_C( 36), -INT8_C( 53), INT8_C( 45), -INT8_C( 79) },
{ INT16_C( 5424), -INT16_C( 2701), -INT16_C( 648), INT16_C( 1088), -INT16_C( 3348), INT16_C( 4982), INT16_C( 3870), -INT16_C( 2844) } },
{ { -INT8_C( 1), INT8_C( 49), INT8_C( 86), INT8_C( 15), INT8_C( 94), -INT8_C( 36), -INT8_C( 25), -INT8_C( 20) },
{ INT8_C( 13), -INT8_C( 9), INT8_C( 19), -INT8_C( 120), INT8_C( 54), INT8_C( 44), -INT8_C( 126), -INT8_C( 59) },
{ -INT16_C( 13), -INT16_C( 441), INT16_C( 1634), -INT16_C( 1800), INT16_C( 5076), -INT16_C( 1584), INT16_C( 3150), INT16_C( 1180) } },
{ { INT8_C( 117), INT8_C( 124), INT8_C( 5), INT8_C( 24), INT8_C( 30), INT8_C( 91), INT8_C( 60), -INT8_C( 18) },
{ INT8_C( 55), -INT8_C( 88), INT8_C( 0), INT8_C( 91), INT8_C( 116), INT8_C( 45), INT8_C( 13), INT8_C( 115) },
{ INT16_C( 6435), -INT16_C( 10912), INT16_C( 0), INT16_C( 2184), INT16_C( 3480), INT16_C( 4095), INT16_C( 780), -INT16_C( 2070) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_int8x8_t a = simde_vld1_s8(test_vec[i].a);
simde_int8x8_t b = simde_vld1_s8(test_vec[i].b);
simde_int16x8_t r = simde_vmull_s8(a, b);
simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r));
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_int8x8_t a = simde_test_arm_neon_random_i8x8();
simde_int8x8_t b = simde_test_arm_neon_random_i8x8();
simde_int16x8_t r = simde_vmull_s8(a, b);
simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
static int
test_simde_vmull_s16 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
int16_t a[4];
int16_t b[4];
int32_t r[4];
} test_vec[] = {
{ { INT16_C( 11230), INT16_C( 11512), -INT16_C( 32461), -INT16_C( 31562) },
{ INT16_C( 29011), -INT16_C( 4051), INT16_C( 24636), -INT16_C( 23193) },
{ INT32_C( 325793530), -INT32_C( 46635112), -INT32_C( 799709196), INT32_C( 732017466) } },
{ { INT16_C( 17728), INT16_C( 31395), INT16_C( 5945), INT16_C( 5959) },
{ INT16_C( 2425), -INT16_C( 15905), -INT16_C( 10338), INT16_C( 31939) },
{ INT32_C( 42990400), -INT32_C( 499337475), -INT32_C( 61459410), INT32_C( 190324501) } },
{ { -INT16_C( 17662), INT16_C( 13993), INT16_C( 24380), -INT16_C( 28486) },
{ -INT16_C( 6192), INT16_C( 3200), -INT16_C( 6329), -INT16_C( 30542) },
{ INT32_C( 109363104), INT32_C( 44777600), -INT32_C( 154301020), INT32_C( 870019412) } },
{ { INT16_C( 21805), INT16_C( 26114), INT16_C( 18796), -INT16_C( 6787) },
{ INT16_C( 23635), -INT16_C( 3674), INT16_C( 27188), INT16_C( 13933) },
{ INT32_C( 515361175), -INT32_C( 95942836), INT32_C( 511025648), -INT32_C( 94563271) } },
{ { INT16_C( 5669), INT16_C( 25196), INT16_C( 9846), INT16_C( 18162) },
{ INT16_C( 29198), INT16_C( 21843), INT16_C( 1369), -INT16_C( 31011) },
{ INT32_C( 165523462), INT32_C( 550356228), INT32_C( 13479174), -INT32_C( 563221782) } },
{ { -INT16_C( 8358), -INT16_C( 14612), INT16_C( 26921), INT16_C( 31916) },
{ INT16_C( 21190), -INT16_C( 1427), -INT16_C( 9540), -INT16_C( 7632) },
{ -INT32_C( 177106020), INT32_C( 20851324), -INT32_C( 256826340), -INT32_C( 243582912) } },
{ { -INT16_C( 25103), INT16_C( 26436), INT16_C( 14019), -INT16_C( 11859) },
{ INT16_C( 168), INT16_C( 295), INT16_C( 1029), INT16_C( 24456) },
{ -INT32_C( 4217304), INT32_C( 7798620), INT32_C( 14425551), -INT32_C( 290023704) } },
{ { INT16_C( 29924), INT16_C( 3366), -INT16_C( 11554), -INT16_C( 23415) },
{ -INT16_C( 2524), -INT16_C( 7778), -INT16_C( 12592), -INT16_C( 15933) },
{ -INT32_C( 75528176), -INT32_C( 26180748), INT32_C( 145487968), INT32_C( 373071195) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_int16x4_t a = simde_vld1_s16(test_vec[i].a);
simde_int16x4_t b = simde_vld1_s16(test_vec[i].b);
simde_int32x4_t r = simde_vmull_s16(a, b);
simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r));
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_int16x4_t a = simde_test_arm_neon_random_i16x4();
simde_int16x4_t b = simde_test_arm_neon_random_i16x4();
simde_int32x4_t r = simde_vmull_s16(a, b);
simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
static int
test_simde_vmull_s32 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
int32_t a[2];
int32_t b[2];
int64_t r[2];
} test_vec[] = {
{ { INT32_C( 930126813), -INT32_C( 560729004) },
{ INT32_C( 166776726), -INT32_C( 422116933) },
{ INT64_C( 155123504636954238), INT64_C( 236693207412624732) } },
{ { INT32_C( 1728012372), INT32_C( 633898368) },
{ -INT32_C( 1137785715), -INT32_C( 1374263343) },
{ -INT64_C( 1966107792204865980), -INT64_C( 871143290329924224) } },
{ { INT32_C( 1457882626), INT32_C( 271874170) },
{ INT32_C( 35267655), INT32_C( 2045309221) },
{ INT64_C( 51416101484262030), INT64_C( 556066746852721570) } },
{ { -INT32_C( 757078191), -INT32_C( 84433043) },
{ INT32_C( 1018635627), -INT32_C( 1897214580) },
{ -INT64_C( 771186817777310757), INT64_C( 160187600213366940) } },
{ { -INT32_C( 823865517), -INT32_C( 1898047417) },
{ INT32_C( 1636890684), INT32_C( 1004173801) },
{ -INT64_C( 1348577789646143628), -INT64_C( 1905969489207122017) } },
{ { -INT32_C( 854738592), -INT32_C( 876084128) },
{ -INT32_C( 1241022678), -INT32_C( 1622806196) },
{ INT64_C( 1060749976433789376), INT64_C( 1421714751135657088) } },
{ { INT32_C( 124594624), INT32_C( 2123713602) },
{ INT32_C( 786441796), -INT32_C( 9848161) },
{ INT64_C( 97986419870504704), -INT64_C( 20914673470385922) } },
{ { -INT32_C( 724732300), -INT32_C( 1532979846) },
{ INT32_C( 1616619284), INT32_C( 1509925017) },
{ -INT64_C( 1171616211917673200), -INT64_C( 2314684620032207382) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_int32x2_t a = simde_vld1_s32(test_vec[i].a);
simde_int32x2_t b = simde_vld1_s32(test_vec[i].b);
simde_int64x2_t r = simde_vmull_s32(a, b);
simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r));
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_int32x2_t a = simde_test_arm_neon_random_i32x2();
simde_int32x2_t b = simde_test_arm_neon_random_i32x2();
simde_int64x2_t r = simde_vmull_s32(a, b);
simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
static int
test_simde_vmull_u8 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
uint8_t a[8];
uint8_t b[8];
uint16_t r[8];
} test_vec[] = {
{ { UINT8_C( 94), UINT8_C(152), UINT8_C( 27), UINT8_C(118), UINT8_C(190), UINT8_C(231), UINT8_C( 17), UINT8_C(211) },
{ UINT8_C( 99), UINT8_C(112), UINT8_C( 80), UINT8_C(144), UINT8_C(181), UINT8_C(106), UINT8_C( 70), UINT8_C( 39) },
{ UINT16_C( 9306), UINT16_C(17024), UINT16_C( 2160), UINT16_C(16992), UINT16_C(34390), UINT16_C(24486), UINT16_C( 1190), UINT16_C( 8229) } },
{ { UINT8_C(237), UINT8_C(190), UINT8_C( 61), UINT8_C( 90), UINT8_C( 53), UINT8_C( 74), UINT8_C(239), UINT8_C( 23) },
{ UINT8_C( 70), UINT8_C(227), UINT8_MAX, UINT8_C(159), UINT8_C(184), UINT8_C(227), UINT8_C(105), UINT8_C( 22) },
{ UINT16_C(16590), UINT16_C(43130), UINT16_C(15555), UINT16_C(14310), UINT16_C( 9752), UINT16_C(16798), UINT16_C(25095), UINT16_C( 506) } },
{ { UINT8_C(123), UINT8_C(132), UINT8_C(141), UINT8_C( 57), UINT8_C(108), UINT8_C(158), UINT8_C( 12), UINT8_C(207) },
{ UINT8_C( 14), UINT8_C( 93), UINT8_C( 96), UINT8_C(196), UINT8_C(199), UINT8_C(166), UINT8_C(235), UINT8_C(180) },
{ UINT16_C( 1722), UINT16_C(12276), UINT16_C(13536), UINT16_C(11172), UINT16_C(21492), UINT16_C(26228), UINT16_C( 2820), UINT16_C(37260) } },
{ { UINT8_C(100), UINT8_C( 40), UINT8_C( 15), UINT8_C(154), UINT8_C(114), UINT8_C(254), UINT8_C(177), UINT8_C(185) },
{ UINT8_C(226), UINT8_C(177), UINT8_C( 88), UINT8_C(154), UINT8_C(148), UINT8_C(193), UINT8_C(176), UINT8_C( 16) },
{ UINT16_C(22600), UINT16_C( 7080), UINT16_C( 1320), UINT16_C(23716), UINT16_C(16872), UINT16_C(49022), UINT16_C(31152), UINT16_C( 2960) } },
{ { UINT8_C( 69), UINT8_C( 61), UINT8_C( 73), UINT8_C(177), UINT8_C(220), UINT8_C( 86), UINT8_C(129), UINT8_C(234) },
{ UINT8_C(179), UINT8_C(225), UINT8_C(174), UINT8_C(122), UINT8_C(135), UINT8_C(153), UINT8_C( 46), UINT8_C(236) },
{ UINT16_C(12351), UINT16_C(13725), UINT16_C(12702), UINT16_C(21594), UINT16_C(29700), UINT16_C(13158), UINT16_C( 5934), UINT16_C(55224) } },
{ { UINT8_C(194), UINT8_C( 61), UINT8_C(134), UINT8_C( 52), UINT8_C( 60), UINT8_C( 55), UINT8_C(237), UINT8_C( 30) },
{ UINT8_C(232), UINT8_C( 69), UINT8_C(184), UINT8_C(125), UINT8_C( 6), UINT8_C(104), UINT8_C(141), UINT8_C( 76) },
{ UINT16_C(45008), UINT16_C( 4209), UINT16_C(24656), UINT16_C( 6500), UINT16_C( 360), UINT16_C( 5720), UINT16_C(33417), UINT16_C( 2280) } },
{ { UINT8_C(166), UINT8_C(214), UINT8_C(253), UINT8_C(130), UINT8_C( 44), UINT8_C(126), UINT8_C(108), UINT8_C(223) },
{ UINT8_C( 95), UINT8_C( 27), UINT8_C( 89), UINT8_C(231), UINT8_C(180), UINT8_C(136), UINT8_C(211), UINT8_C(118) },
{ UINT16_C(15770), UINT16_C( 5778), UINT16_C(22517), UINT16_C(30030), UINT16_C( 7920), UINT16_C(17136), UINT16_C(22788), UINT16_C(26314) } },
{ { UINT8_C(197), UINT8_C( 89), UINT8_C(171), UINT8_C( 1), UINT8_C(144), UINT8_C(152), UINT8_C( 31), UINT8_C(121) },
{ UINT8_C(222), UINT8_C(215), UINT8_C(246), UINT8_C(228), UINT8_C( 64), UINT8_C(131), UINT8_C( 48), UINT8_C(230) },
{ UINT16_C(43734), UINT16_C(19135), UINT16_C(42066), UINT16_C( 228), UINT16_C( 9216), UINT16_C(19912), UINT16_C( 1488), UINT16_C(27830) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a);
simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b);
simde_uint16x8_t r = simde_vmull_u8(a, b);
simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r));
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_uint8x8_t a = simde_test_arm_neon_random_u8x8();
simde_uint8x8_t b = simde_test_arm_neon_random_u8x8();
simde_uint16x8_t r = simde_vmull_u8(a, b);
simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
static int
test_simde_vmull_u16 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
uint16_t a[4];
uint16_t b[4];
uint32_t r[4];
} test_vec[] = {
{ { UINT16_C(65254), UINT16_C(49526), UINT16_C(58343), UINT16_C(28199) },
{ UINT16_C( 3568), UINT16_C(53134), UINT16_C(38079), UINT16_C(44979) },
{ UINT32_C( 232826272), UINT32_C(2631514484), UINT32_C(2221643097), UINT32_C(1268362821) } },
{ { UINT16_C(41279), UINT16_C(54255), UINT16_C(49218), UINT16_C(49274) },
{ UINT16_C(44771), UINT16_C(52368), UINT16_C(62625), UINT16_C(34586) },
{ UINT32_C(1848102109), UINT32_C(2841225840), UINT32_C(3082277250), UINT32_C(1704190564) } },
{ { UINT16_C(37107), UINT16_C(55881), UINT16_C(28787), UINT16_C(25416) },
{ UINT16_C(54910), UINT16_C(15666), UINT16_C(58986), UINT16_C(43500) },
{ UINT32_C(2037545370), UINT32_C( 875431746), UINT32_C(1698029982), UINT32_C(1105596000) } },
{ { UINT16_C(56455), UINT16_C(51581), UINT16_C(63388), UINT16_C(32649) },
{ UINT16_C( 6821), UINT16_C(17995), UINT16_C(25870), UINT16_C( 462) },
{ UINT32_C( 385079555), UINT32_C( 928200095), UINT32_C(1639847560), UINT32_C( 15083838) } },
{ { UINT16_C( 6133), UINT16_C(27099), UINT16_C( 9351), UINT16_C( 1484) },
{ UINT16_C(65530), UINT16_C(25923), UINT16_C(12261), UINT16_C(27662) },
{ UINT32_C( 401895490), UINT32_C( 702487377), UINT32_C( 114652611), UINT32_C( 41050408) } },
{ { UINT16_C(35595), UINT16_C(43062), UINT16_C(49027), UINT16_C(10279) },
{ UINT16_C(29657), UINT16_C(59503), UINT16_C(15832), UINT16_C(52969) },
{ UINT32_C(1055640915), UINT32_C(2562318186), UINT32_C( 776195464), UINT32_C( 544468351) } },
{ { UINT16_C(50516), UINT16_C(56119), UINT16_C( 1001), UINT16_C(58337) },
{ UINT16_C( 9218), UINT16_C(59208), UINT16_C(22355), UINT16_C(24404) },
{ UINT32_C( 465656488), UINT32_C(3322693752), UINT32_C( 22377355), UINT32_C(1423656148) } },
{ { UINT16_C(35554), UINT16_C(25863), UINT16_C(11849), UINT16_C( 9102) },
{ UINT16_C(64929), UINT16_C(31243), UINT16_C(62522), UINT16_C(36424) },
{ UINT32_C(2308485666), UINT32_C( 808037709), UINT32_C( 740823178), UINT32_C( 331531248) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a);
simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b);
simde_uint32x4_t r = simde_vmull_u16(a, b);
simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r));
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_uint16x4_t a = simde_test_arm_neon_random_u16x4();
simde_uint16x4_t b = simde_test_arm_neon_random_u16x4();
simde_uint32x4_t r = simde_vmull_u16(a, b);
simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
static int
test_simde_vmull_u32 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
uint32_t a[2];
uint32_t b[2];
uint64_t r[2];
} test_vec[] = {
{ { UINT32_C(1764671971), UINT32_C( 417693998) },
{ UINT32_C(3142976160), UINT32_C(3551123166) },
{ UINT64_C( 5546321935073211360), UINT64_C( 1483282832596957668) } },
{ { UINT32_C(1771619725), UINT32_C(3853953090) },
{ UINT32_C(4088780350), UINT32_C(1688137088) },
{ UINT64_C( 7243763919252403750), UINT64_C( 6506001146641201920) } },
{ { UINT32_C(3721252015), UINT32_C(4009079374) },
{ UINT32_C(2477411253), UINT32_C(2926007073) },
{ UINT64_C( 9219071617209924795), UINT64_C(11730594604542412302) } },
{ { UINT32_C(1528299288), UINT32_C(3863006887) },
{ UINT32_C( 618263972), UINT32_C(2441639906) },
{ UINT64_C( 944892388203651936), UINT64_C( 9432071772452032622) } },
{ { UINT32_C(2456704580), UINT32_C(3179307784) },
{ UINT32_C(3494980270), UINT32_C(2524887166) },
{ UINT64_C( 8586134036318636600), UINT64_C( 8027393420585500144) } },
{ { UINT32_C(1592891063), UINT32_C( 138686820) },
{ UINT32_C( 153886246), UINT32_C(3650794901) },
{ UINT64_C( 245124025972019498), UINT64_C( 506317135291904820) } },
{ { UINT32_C( 325781771), UINT32_C( 466742380) },
{ UINT32_C(2498437654), UINT32_C(2435541466) },
{ UINT64_C( 813945443653205234), UINT64_C( 1136770420429529080) } },
{ { UINT32_C(1676614911), UINT32_C(1953182798) },
{ UINT32_C(3883767890), UINT32_C(1489049677) },
{ UINT64_C( 6511583155237007790), UINT64_C( 2908386214483856246) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a);
simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b);
simde_uint64x2_t r = simde_vmull_u32(a, b);
simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r));
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_uint32x2_t a = simde_test_arm_neon_random_u32x2();
simde_uint32x2_t b = simde_test_arm_neon_random_u32x2();
simde_uint64x2_t r = simde_vmull_u32(a, b);
simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
static int
test_simde_vmull_p8 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
simde_poly8_t a[8];
simde_poly8_t b[8];
simde_poly16_t r[8];
} test_vec[] = {
{ { SIMDE_POLY8_C( 185), SIMDE_POLY8_C( 129), SIMDE_POLY8_C( 202), SIMDE_POLY8_C( 179),
SIMDE_POLY8_C( 49), SIMDE_POLY8_C( 106), SIMDE_POLY8_C( 44), SIMDE_POLY8_C( 74) },
{ SIMDE_POLY8_C( 172), SIMDE_POLY8_C( 196), SIMDE_POLY8_C( 174), SIMDE_POLY8_C( 244),
SIMDE_POLY8_C( 107), SIMDE_POLY8_C( 166), SIMDE_POLY8_C( 249), SIMDE_POLY8_C( 44) },
{ SIMDE_POLY16_C( 19596), SIMDE_POLY16_C( 25284), SIMDE_POLY16_C( 30892), SIMDE_POLY16_C( 27356),
SIMDE_POLY16_C( 3003), SIMDE_POLY16_C( 14652), SIMDE_POLY16_C( 6924), SIMDE_POLY16_C( 2616) } },
{ { SIMDE_POLY8_C( 120), SIMDE_POLY8_C( 3), SIMDE_POLY8_C( 247), SIMDE_POLY8_C( 24),
SIMDE_POLY8_C( 5), SIMDE_POLY8_C( 122), SIMDE_POLY8_C( 20), SIMDE_POLY8_C( 38) },
{ SIMDE_POLY8_C( 198), SIMDE_POLY8_C( 45), SIMDE_POLY8_C( 54), SIMDE_POLY8_C( 185),
SIMDE_POLY8_C( 37), SIMDE_POLY8_C( 64), SIMDE_POLY8_C( 214), SIMDE_POLY8_C( 191) },
{ SIMDE_POLY16_C( 8976), SIMDE_POLY16_C( 119), SIMDE_POLY16_C( 5026), SIMDE_POLY16_C( 3672),
SIMDE_POLY16_C( 177), SIMDE_POLY16_C( 7808), SIMDE_POLY16_C( 3640), SIMDE_POLY16_C( 5218) } },
{ { SIMDE_POLY8_C( 17), SIMDE_POLY8_C( 86), SIMDE_POLY8_C( 204), SIMDE_POLY8_C( 173),
SIMDE_POLY8_C( 69), SIMDE_POLY8_C( 59), SIMDE_POLY8_C( 27), SIMDE_POLY8_C( 56) },
{ SIMDE_POLY8_C( 122), SIMDE_POLY8_C( 0), SIMDE_POLY8_C( 185), SIMDE_POLY8_C( 13),
SIMDE_POLY8_C( 243), SIMDE_POLY8_C( 23), SIMDE_POLY8_C( 9), SIMDE_POLY8_C( 227) },
{ SIMDE_POLY16_C( 2010), SIMDE_POLY16_C( 0), SIMDE_POLY16_C( 30188), SIMDE_POLY16_C( 1905),
SIMDE_POLY16_C( 16383), SIMDE_POLY16_C( 785), SIMDE_POLY16_C( 195), SIMDE_POLY16_C( 5448) } },
{ { SIMDE_POLY8_C( 42), SIMDE_POLY8_C( 173), SIMDE_POLY8_C( 65), SIMDE_POLY8_C( 38),
SIMDE_POLY8_C( 80), SIMDE_POLY8_C( 199), SIMDE_POLY8_C( 134), SIMDE_POLY8_C( 200) },
{ SIMDE_POLY8_C( 70), SIMDE_POLY8_C( 119), SIMDE_POLY8_C( 226), SIMDE_POLY8_C( 247),
SIMDE_POLY8_C( 223), SIMDE_POLY8_C( 37), SIMDE_POLY8_C( 108), SIMDE_POLY8_C( 80) },
{ SIMDE_POLY16_C( 2684), SIMDE_POLY16_C( 14195), SIMDE_POLY16_C( 14434), SIMDE_POLY16_C( 7378),
SIMDE_POLY16_C( 14896), SIMDE_POLY16_C( 6971), SIMDE_POLY16_C( 14184), SIMDE_POLY16_C( 16000) } },
{ { SIMDE_POLY8_C( 127), SIMDE_POLY8_C( 107), SIMDE_POLY8_C( 68), SIMDE_POLY8_C( 137),
SIMDE_POLY8_C( 245), SIMDE_POLY8_C( 88), SIMDE_POLY8_C( 61), SIMDE_POLY8_C( 182) },
{ SIMDE_POLY8_C( 37), SIMDE_POLY8_C( 182), SIMDE_POLY8_C( 241), SIMDE_POLY8_C( 64),
SIMDE_POLY8_C( 63), SIMDE_POLY8_C( 134), SIMDE_POLY8_C( 160), SIMDE_POLY8_C( 224) },
{ SIMDE_POLY16_C( 3683), SIMDE_POLY16_C( 16170), SIMDE_POLY16_C( 16260), SIMDE_POLY16_C( 8768),
SIMDE_POLY16_C( 5267), SIMDE_POLY16_C( 11728), SIMDE_POLY16_C( 6432), SIMDE_POLY16_C( 24640) } },
{ { SIMDE_POLY8_C( 0), SIMDE_POLY8_C( 196), SIMDE_POLY8_C( 41), SIMDE_POLY8_C( 67),
SIMDE_POLY8_C( 61), SIMDE_POLY8_C( 176), SIMDE_POLY8_C( 91), SIMDE_POLY8_C( 135) },
{ SIMDE_POLY8_C( 166), SIMDE_POLY8_C( 199), SIMDE_POLY8_C( 200), SIMDE_POLY8_C( 195),
SIMDE_POLY8_C( 192), SIMDE_POLY8_C( 118), SIMDE_POLY8_C( 158), SIMDE_POLY8_C( 222) },
{ SIMDE_POLY16_C( 0), SIMDE_POLY16_C( 20828), SIMDE_POLY16_C( 8072), SIMDE_POLY16_C( 12677),
SIMDE_POLY16_C( 4544), SIMDE_POLY16_C( 12960), SIMDE_POLY16_C( 11058), SIMDE_POLY16_C( 27930) } },
{ { SIMDE_POLY8_C( 122), SIMDE_POLY8_C( 199), SIMDE_POLY8_C( 173), SIMDE_POLY8_C( 4),
SIMDE_POLY8_C( 213), SIMDE_POLY8_C( 48), SIMDE_POLY8_C( 71), SIMDE_POLY8_C( 196) },
{ SIMDE_POLY8_C( 179), SIMDE_POLY8_C( 3), SIMDE_POLY8_C( 160), SIMDE_POLY8_C( 204),
SIMDE_POLY8_C( 80), SIMDE_POLY8_C( 29), SIMDE_POLY8_C( 12), SIMDE_POLY8_C( 98) },
{ SIMDE_POLY16_C( 13678), SIMDE_POLY16_C( 329), SIMDE_POLY16_C( 17184), SIMDE_POLY16_C( 816),
SIMDE_POLY16_C( 14352), SIMDE_POLY16_C( 624), SIMDE_POLY16_C( 804), SIMDE_POLY16_C( 10248) } },
{ { SIMDE_POLY8_C( 82), SIMDE_POLY8_C( 238), SIMDE_POLY8_C( 78), SIMDE_POLY8_C( 52),
SIMDE_POLY8_C( 152), SIMDE_POLY8_C( 159), SIMDE_POLY8_C( 178), SIMDE_POLY8_C( 24) },
{ SIMDE_POLY8_C( 194), SIMDE_POLY8_C( 223), SIMDE_POLY8_C( 173), SIMDE_POLY8_C( 189),
SIMDE_POLY8_C( 250), SIMDE_POLY8_C( 104), SIMDE_POLY8_C( 117), SIMDE_POLY8_C( 50) },
{ SIMDE_POLY16_C( 15652), SIMDE_POLY16_C( 18330), SIMDE_POLY16_C( 11718), SIMDE_POLY16_C( 7812),
SIMDE_POLY16_C( 30064), SIMDE_POLY16_C( 12504), SIMDE_POLY16_C( 13210), SIMDE_POLY16_C( 688) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_poly8x8_t a = simde_vld1_p8(test_vec[i].a);
simde_poly8x8_t b = simde_vld1_p8(test_vec[i].b);
simde_poly16x8_t r = simde_vmull_p8(a, b);
simde_test_arm_neon_assert_equal_p16x8(r, simde_vld1q_p16(test_vec[i].r));
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_poly8x8_t a = simde_test_arm_neon_random_p8x8();
simde_poly8x8_t b = simde_test_arm_neon_random_p8x8();
simde_poly16x8_t r = simde_vmull_p8(a, b);
simde_test_arm_neon_write_p8x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_p8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_p16x8(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) && !defined(SIMDE_BUG_GCC_113065)
static int
test_simde_vmull_p64 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
struct {
simde_poly64_t a[1];
simde_poly64_t b[1];
simde_poly64_t r[2];
} test_vec[] = {
{ { SIMDE_POLY64_C( 7068902937866061824) },
{ SIMDE_POLY64_C(14874139788804648960) },
{ SIMDE_POLY64_C( 3152885789599675803), SIMDE_POLY64_C( 3604645741034733568) } },
{ { SIMDE_POLY64_C( 6112195614237017088) },
{ SIMDE_POLY64_C(15100063711026538496) },
{ SIMDE_POLY64_C( 4210036932911217869), SIMDE_POLY64_C( 8872695348131266560) } },
{ { SIMDE_POLY64_C( 6184300182471711744) },
{ SIMDE_POLY64_C(15413425443050586112) },
{ SIMDE_POLY64_C( 4320778884545928455), SIMDE_POLY64_C(16183132146376900608) } },
{ { SIMDE_POLY64_C(18427427496663795712) },
{ SIMDE_POLY64_C(17337026183024695296) },
{ SIMDE_POLY64_C( 5785060070923143728), SIMDE_POLY64_C( 7243298849874247680) } },
{ { SIMDE_POLY64_C( 5898302655747178496) },
{ SIMDE_POLY64_C(14270322748663631872) },
{ SIMDE_POLY64_C( 4430735050927400579), SIMDE_POLY64_C( 281238396959981568) } },
{ { SIMDE_POLY64_C( 4310271262858839552) },
{ SIMDE_POLY64_C(11226476335783634944) },
{ SIMDE_POLY64_C( 2293839743262371683), SIMDE_POLY64_C(13199221759279104000) } },
{ { SIMDE_POLY64_C( 1504197760910681088) },
{ SIMDE_POLY64_C(12397747313193005056) },
{ SIMDE_POLY64_C( 576860236216524364), SIMDE_POLY64_C( 6977643691527634944) } },
{ { SIMDE_POLY64_C(17342940921599655936) },
{ SIMDE_POLY64_C( 2283591279968234496) },
{ SIMDE_POLY64_C( 735715569773265056), SIMDE_POLY64_C( 3633255345754734592) } },
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde_poly64_t a = test_vec[i].a[0];
simde_poly64_t b = test_vec[i].b[0];
simde_poly128_t r = simde_vmull_p64(a, b);
simde_poly128_t mask = HEDLEY_STATIC_CAST(simde_poly128_t, 0xFFFFFFFFFFFFFFFFull);
simde_poly64_t top_r = HEDLEY_STATIC_CAST(simde_poly64_t, ((r >> 64) & mask));
simde_poly64_t bottom_r = HEDLEY_STATIC_CAST(simde_poly64_t, (r & mask));
simde_assert_equal_p64(top_r, test_vec[i].r[0]);
simde_assert_equal_p64(bottom_r, test_vec[i].r[1]);
}
return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_poly64x1_t a = simde_test_arm_neon_random_p64x1();
simde_poly64x1_t b = simde_test_arm_neon_random_p64x1();
simde_poly64x2_t r = simde_vmull_p64(a, b);
simde_test_arm_neon_write_p64x1(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_arm_neon_write_p64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_arm_neon_write_p64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}
#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) && !defined(SIMDE_BUG_GCC_113065) */
SIMDE_TEST_FUNC_LIST_BEGIN
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_s8)
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_s16)
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_s32)
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_u8)
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_u16)
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_u32)
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_p8)
#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) && !defined(SIMDE_BUG_GCC_113065)
SIMDE_TEST_FUNC_LIST_ENTRY(vmull_p64)
#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) && !defined(SIMDE_BUG_GCC_113065) */
SIMDE_TEST_FUNC_LIST_END
#include "test-neon-footer.h"
|