1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
|
; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s
define <4 x half> @test_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
entry:
; CHECK-LABEL: test_16x4
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #0
;
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
ret <4 x half> %res
}
define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
entry:
; CHECK-LABEL: test_16x4_lane_1
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[1], #0
;
%c.cast = bitcast <4 x half> %c to <2 x i32>
%c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 1, i32 1>
%c.res = bitcast <2 x i32> %c.dup to <4 x half>
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
ret <4 x half> %res
}
define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
entry:
; CHECK-LABEL: test_rot90_16x4
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #90
;
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
ret <4 x half> %res
}
define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
entry:
; CHECK-LABEL: test_rot90_16x4_lane_0
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #90
;
%c.cast = bitcast <4 x half> %c to <2 x i32>
%c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 0, i32 0>
%c.res = bitcast <2 x i32> %c.dup to <4 x half>
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
ret <4 x half> %res
}
define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
entry:
; CHECK-LABEL: test_rot180_16x4
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #180
;
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
ret <4 x half> %res
}
define <4 x half> @test_rot180_16x4_lane_0(<4 x half> %a, <4 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_rot180_16x4_lane_0
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #180
%c.cast = bitcast <8 x half> %c to <4 x i32>
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <2 x i32> <i32 0, i32 0>
%c.res = bitcast <2 x i32> %c.dup to <4 x half>
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
ret <4 x half> %res
}
define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
entry:
; CHECK-LABEL: test_rot270_16x4
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #270
;
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
ret <4 x half> %res
}
define <2 x float> @test_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
entry:
; CHECK-LABEL: test_32x2
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0
;
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
ret <2 x float> %res
}
define <2 x float> @test_rot90_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
entry:
; CHECK-LABEL: test_rot90_32x2
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #90
;
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
ret <2 x float> %res
}
define <2 x float> @test_rot180_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
entry:
; CHECK-LABEL: test_rot180_32x2
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #180
;
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
ret <2 x float> %res
}
define <2 x float> @test_rot270_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
entry:
; CHECK-LABEL: test_rot270_32x2
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #270
;
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
ret <2 x float> %res
}
define <8 x half> @test_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_16x8
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #0
;
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
ret <8 x half> %res
}
define <8 x half> @test_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_16x8_lane_0
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[0], #0
;
%c.cast = bitcast <8 x half> %c to <4 x i32>
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
ret <8 x half> %res
}
define <8 x half> @test_rot90_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_rot90_16x8
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #90
;
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
ret <8 x half> %res
}
define <8 x half> @test_rot90_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_rot90_16x8_lane_1
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[1], #90
;
%c.cast = bitcast <8 x half> %c to <4 x i32>
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
ret <8 x half> %res
}
define <8 x half> @test_rot180_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_rot180_16x8
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #180
;
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
ret <8 x half> %res
}
define <8 x half> @test_rot180_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_rot180_16x8_lane_1
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[1], #180
;
%c.cast = bitcast <8 x half> %c to <4 x i32>
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
ret <8 x half> %res
}
define <8 x half> @test_rot270_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_rot270_16x8
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #270
;
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
ret <8 x half> %res
}
define <8 x half> @test_rot270_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
entry:
; CHECK-LABEL: test_rot270_16x8_lane_0
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[0], #270
;
%c.cast = bitcast <8 x half> %c to <4 x i32>
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
ret <8 x half> %res
}
define <4 x float> @test_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
entry:
; CHECK-LABEL: test_32x4
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #0
;
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %res
}
define <4 x float> @test_32x4_lane_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
entry:
; CHECK-LABEL: test_32x4_lane_0
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0], #0
;
%c.cast = bitcast <4 x float> %c to <2 x i64>
%c.dup = shufflevector <2 x i64> %c.cast , <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%c.res = bitcast <2 x i64> %c.dup to <4 x float>
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c.res)
ret <4 x float> %res
}
define <4 x float> @test_rot90_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
entry:
; CHECK-LABEL: test_rot90_32x4
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #90
;
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %res
}
define <4 x float> @test_rot180_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
entry:
; CHECK-LABEL: test_rot180_32x4
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #180
;
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %res
}
define <4 x float> @test_rot270_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
entry:
; CHECK-LABEL: test_rot270_32x4
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #270
;
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %res
}
define <2 x double> @test_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: test_64x2
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #0
;
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
ret <2 x double> %res
}
define <2 x double> @test_rot90_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: test_rot90_64x2
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #90
;
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
ret <2 x double> %res
}
define <2 x double> @test_rot180_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: test_rot180_64x2
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #180
;
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
ret <2 x double> %res
}
define <2 x double> @test_rot270_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
entry:
; CHECK-LABEL: test_rot270_64x2
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #270
;
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
ret <2 x double> %res
}
declare <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half>, <4 x half>, <4 x half>)
declare <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half>, <4 x half>, <4 x half>)
declare <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half>, <4 x half>, <4 x half>)
declare <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half>, <4 x half>, <4 x half>)
declare <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half>, <8 x half>, <8 x half>)
declare <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half>, <8 x half>, <8 x half>)
declare <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half>, <8 x half>, <8 x half>)
declare <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half>, <8 x half>, <8 x half>)
declare <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double>, <2 x double>, <2 x double>)
declare <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double>, <2 x double>, <2 x double>)
declare <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double>, <2 x double>, <2 x double>)
declare <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double>, <2 x double>, <2 x double>)
|