1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
; FIXED WIDTH
define i8 @ctz_v8i1(<8 x i1> %a) {
; CHECK-LABEL: .LCPI0_0:
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 1
; CHECK-LABEL: ctz_v8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.8b, v0.8b, #7
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umaxv b0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 0)
ret i8 %res
}
define i32 @ctz_v16i1(<16 x i1> %a) {
; CHECK-LABEL: .LCPI1_0:
; CHECK-NEXT: .byte 16
; CHECK-NEXT: .byte 15
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 12
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 1
; CHECK-LABEL: ctz_v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sub w8, w9, w8
; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
ret i32 %res
}
define i16 @ctz_v4i32(<4 x i32> %a) {
; CHECK-LABEL: .LCPI2_0:
; CHECK-NEXT: .hword 4
; CHECK-NEXT: .hword 3
; CHECK-NEXT: .hword 2
; CHECK-NEXT: .hword 1
; CHECK-LABEL: ctz_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sub w8, w9, w8
; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
%res = call i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32> %a, i1 0)
ret i16 %res
}
define i7 @ctz_i7_v8i1(<8 x i1> %a) {
; CHECK-LABEL: .LCPI3_0:
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 1
; CHECK-LABEL: ctz_i7_v8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.8b, v0.8b, #7
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umaxv b0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%res = call i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1> %a, i1 0)
ret i7 %res
}
; ZERO IS POISON
define i8 @ctz_v8i1_poison(<8 x i1> %a) {
; CHECK-LABEL: .LCPI4_0:
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 1
; CHECK-LABEL: ctz_v8i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.8b, v0.8b, #7
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umaxv b0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 1)
ret i8 %res
}
declare i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1>, i1)
declare i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1>, i1)
declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1)
declare i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32>, i1)
|