1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64-arm-none-eabi -target-feature +neon -target-feature +bf16 \
// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg -instcombine | FileCheck --check-prefix=CHECK-LE %s
// RUN: %clang_cc1 -triple aarch64_be-arm-none-eabi -target-feature +neon -target-feature +bf16 \
// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg -instcombine | FileCheck --check-prefix=CHECK-BE %s
#include <arm_neon.h>
// CHECK-LE-LABEL: @test_vcopy_lane_bf16_v1(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[B:%.*]], <4 x i32> <i32 0, i32 7, i32 2, i32 3>
// CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopy_lane_bf16_v1(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> undef, <4 x i32> <i32 3, i32 undef, i32 1, i32 0>
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = shufflevector <4 x bfloat> [[SHUFFLE]], <4 x bfloat> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
//
bfloat16x4_t test_vcopy_lane_bf16_v1(bfloat16x4_t a, bfloat16x4_t b) {
return vcopy_lane_bf16(a, 1, b, 3);
}
// CHECK-LE-LABEL: @test_vcopy_lane_bf16_v2(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[B:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
// CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopy_lane_bf16_v2(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 0>
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = shufflevector <4 x bfloat> [[SHUFFLE]], <4 x bfloat> [[B:%.*]], <4 x i32> <i32 0, i32 1, i32 7, i32 3>
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
//
bfloat16x4_t test_vcopy_lane_bf16_v2(bfloat16x4_t a, bfloat16x4_t b) {
return vcopy_lane_bf16(a, 2, b, 0);
}
// CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v1(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[TMP0:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[TMP0]], <8 x i32> <i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v1(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[TMP0:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[SHUFFLE]], <8 x bfloat> [[TMP0]], <8 x i32> <i32 9, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
//
bfloat16x8_t test_vcopyq_lane_bf16_v1(bfloat16x8_t a, bfloat16x4_t b) {
return vcopyq_lane_bf16(a, 0, b, 2);
}
// CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v2(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[TMP0:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[TMP0]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 7>
// CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v2(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[TMP0:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 undef, i32 0>
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[SHUFFLE]], <8 x bfloat> [[TMP0]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 11, i32 7>
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
//
bfloat16x8_t test_vcopyq_lane_bf16_v2(bfloat16x8_t a, bfloat16x4_t b) {
return vcopyq_lane_bf16(a, 6, b, 0);
}
// CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v1(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 7
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 0
// CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v1(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 0
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 0
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
//
bfloat16x4_t test_vcopy_laneq_bf16_v1(bfloat16x4_t a, bfloat16x8_t b) {
return vcopy_laneq_bf16(a, 0, b, 7);
}
// CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v2(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 4
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 3
// CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v2(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 undef>
// CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 3
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 3
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
//
bfloat16x4_t test_vcopy_laneq_bf16_v2(bfloat16x4_t a, bfloat16x8_t b) {
return vcopy_laneq_bf16(a, 3, b, 4);
}
// CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v1(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6, i32 7>
// CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v1(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[SHUFFLE]], <8 x bfloat> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6, i32 7>
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
//
bfloat16x8_t test_vcopyq_laneq_bf16_v1(bfloat16x8_t a, bfloat16x8_t b) {
return vcopyq_laneq_bf16(a, 3, b, 7);
}
// CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v2(
// CHECK-LE-NEXT: entry:
// CHECK-LE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 10, i32 7>
// CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
//
// CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v2(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 undef, i32 0>
// CHECK-BE-NEXT: [[VSET_LANE:%.*]] = shufflevector <8 x bfloat> [[SHUFFLE]], <8 x bfloat> [[B:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 13, i32 7>
// CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
//
bfloat16x8_t test_vcopyq_laneq_bf16_v2(bfloat16x8_t a, bfloat16x8_t b) {
return vcopyq_laneq_bf16(a, 6, b, 2);
}
|