1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - %s | FileCheck -check-prefix=CHECK-CODEGEN %s
// REQUIRES: aarch64-registered-target
// Test ARM64 SIMD max/min intrinsics
#include <arm_neon.h>
// Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit reduction
int8_t test_vmaxv_s8(int8x8_t a1) {
// CHECK-LABEL: define i8 @test_vmaxv_s8(
return vmaxv_s8(a1);
// CHECK: call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(
}
uint16_t test_vminvq_u16(uint16x8_t a1) {
// CHECK-LABEL: define i16 @test_vminvq_u16(
return vminvq_u16(a1);
// CHECK: call i32 @llvm.aarch64.neon.uminv.i32.v8i16(
}
// Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit pairwise
uint8x8_t test_vmin_u8(uint8x8_t a1, uint8x8_t a2) {
// CHECK-LABEL: define <8 x i8> @test_vmin_u8(
return vmin_u8(a1, a2);
// CHECK: call <8 x i8> @llvm.aarch64.neon.umin.v8i8(
}
uint8x16_t test_vminq_u8(uint8x16_t a1, uint8x16_t a2) {
// CHECK-LABEL: define <16 x i8> @test_vminq_u8(
return vminq_u8(a1, a2);
// CHECK: call <16 x i8> @llvm.aarch64.neon.umin.v16i8(
}
int16x8_t test_vmaxq_s16(int16x8_t a1, int16x8_t a2) {
// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(
return vmaxq_s16(a1, a2);
// CHECK: call <8 x i16> @llvm.aarch64.neon.smax.v8i16(
}
// Test the more complicated cases of [suf]32 and f64
float64x2_t test_vmaxq_f64(float64x2_t a1, float64x2_t a2) {
// CHECK-LABEL: define <2 x double> @test_vmaxq_f64(
return vmaxq_f64(a1, a2);
// CHECK: call <2 x double> @llvm.aarch64.neon.fmax.v2f64(
}
float32x4_t test_vmaxq_f32(float32x4_t a1, float32x4_t a2) {
// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(
return vmaxq_f32(a1, a2);
// CHECK: call <4 x float> @llvm.aarch64.neon.fmax.v4f32(
}
float64x2_t test_vminq_f64(float64x2_t a1, float64x2_t a2) {
// CHECK-LABEL: define <2 x double> @test_vminq_f64(
return vminq_f64(a1, a2);
// CHECK: call <2 x double> @llvm.aarch64.neon.fmin.v2f64(
}
float32x2_t test_vmax_f32(float32x2_t a1, float32x2_t a2) {
// CHECK-LABEL: define <2 x float> @test_vmax_f32(
return vmax_f32(a1, a2);
// CHECK: call <2 x float> @llvm.aarch64.neon.fmax.v2f32(
}
int32x2_t test_vmax_s32(int32x2_t a1, int32x2_t a2) {
// CHECK-LABEL: define <2 x i32> @test_vmax_s32(
return vmax_s32(a1, a2);
// CHECK: call <2 x i32> @llvm.aarch64.neon.smax.v2i32(
}
uint32x2_t test_vmin_u32(uint32x2_t a1, uint32x2_t a2) {
// CHECK-LABEL: define <2 x i32> @test_vmin_u32(
return vmin_u32(a1, a2);
// CHECK: call <2 x i32> @llvm.aarch64.neon.umin.v2i32(
}
float32_t test_vmaxnmv_f32(float32x2_t a1) {
// CHECK-LABEL: define float @test_vmaxnmv_f32(
return vmaxnmv_f32(a1);
// CHECK: llvm.aarch64.neon.fmaxnmv.f32.v2f32
// CHECK-NEXT: ret
}
// this doesn't translate into a valid instruction, regardless of what the
// ARM doc says.
#if 0
float64_t test_vmaxnmvq_f64(float64x2_t a1) {
// CHECK@ test_vmaxnmvq_f64
return vmaxnmvq_f64(a1);
// CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
// CHECK-NEXT@ ret
}
#endif
float32_t test_vmaxnmvq_f32(float32x4_t a1) {
// CHECK-LABEL: define float @test_vmaxnmvq_f32(
return vmaxnmvq_f32(a1);
// CHECK: call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(
// CHECK-NEXT: ret
}
float32_t test_vmaxv_f32(float32x2_t a1) {
// CHECK-LABEL: define float @test_vmaxv_f32(
return vmaxv_f32(a1);
// CHECK: call float @llvm.aarch64.neon.fmaxv.f32.v2f32(
// FIXME check that the 2nd and 3rd arguments are the same V register below
// CHECK-CODEGEN: fmaxp.2s
// CHECK-NEXT: ret
}
int32_t test_vmaxv_s32(int32x2_t a1) {
// CHECK-LABEL: define i32 @test_vmaxv_s32(
return vmaxv_s32(a1);
// CHECK: call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(
// FIXME check that the 2nd and 3rd arguments are the same V register below
// CHECK-CODEGEN: smaxp.2s
// CHECK-NEXT: ret
}
uint32_t test_vmaxv_u32(uint32x2_t a1) {
// CHECK-LABEL: define i32 @test_vmaxv_u32(
return vmaxv_u32(a1);
// CHECK: call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(
// FIXME check that the 2nd and 3rd arguments are the same V register below
// CHECK-CODEGEN: umaxp.2s
// CHECK-NEXT: ret
}
// FIXME punt on this for now; don't forget to fix CHECKs
#if 0
float64_t test_vmaxvq_f64(float64x2_t a1) {
// CHECK@ test_vmaxvq_f64
return vmaxvq_f64(a1);
// CHECK@ llvm.aarch64.neon.fmaxv.i64.v2f64
// CHECK-NEXT@ ret
}
#endif
float32_t test_vmaxvq_f32(float32x4_t a1) {
// CHECK-LABEL: define float @test_vmaxvq_f32(
return vmaxvq_f32(a1);
// CHECK: call float @llvm.aarch64.neon.fmaxv.f32.v4f32(
// CHECK-NEXT: ret
}
float32_t test_vminnmv_f32(float32x2_t a1) {
// CHECK-LABEL: define float @test_vminnmv_f32(
return vminnmv_f32(a1);
// CHECK: call float @llvm.aarch64.neon.fminnmv.f32.v2f32(
// CHECK-NEXT: ret
}
float32_t test_vminvq_f32(float32x4_t a1) {
// CHECK-LABEL: define float @test_vminvq_f32(
return vminvq_f32(a1);
// CHECK: call float @llvm.aarch64.neon.fminv.f32.v4f32(
// CHECK-NEXT: ret
}
// this doesn't translate into a valid instruction, regardless of what the ARM
// doc says.
#if 0
float64_t test_vminnmvq_f64(float64x2_t a1) {
// CHECK@ test_vminnmvq_f64
return vminnmvq_f64(a1);
// CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
// CHECK-NEXT@ ret
}
#endif
float32_t test_vminnmvq_f32(float32x4_t a1) {
// CHECK-LABEL: define float @test_vminnmvq_f32(
return vminnmvq_f32(a1);
// CHECK: call float @llvm.aarch64.neon.fminnmv.f32.v4f32(
// CHECK-NEXT: ret
}
float32_t test_vminv_f32(float32x2_t a1) {
// CHECK-LABEL: define float @test_vminv_f32(
return vminv_f32(a1);
// CHECK: call float @llvm.aarch64.neon.fminv.f32.v2f32(
// CHECK-NEXT: ret
}
int32_t test_vminv_s32(int32x2_t a1) {
// CHECK-LABEL: define i32 @test_vminv_s32(
return vminv_s32(a1);
// CHECK: call i32 @llvm.aarch64.neon.sminv.i32.v2i32(
// CHECK-CODEGEN: sminp.2s
// CHECK-NEXT: ret
}
uint32_t test_vminv_u32(uint32x2_t a1) {
// CHECK-LABEL: define i32 @test_vminv_u32(
return vminv_u32(a1);
// CHECK: call i32 @llvm.aarch64.neon.uminv.i32.v2i32(
}
// FIXME punt on this for now; don't forget to fix CHECKs
#if 0
float64_t test_vminvq_f64(float64x2_t a1) {
// CHECK@ test_vminvq_f64
return vminvq_f64(a1);
// CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
// CHECK-NEXT@ ret
}
#endif
|