File: sve2-bsl.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (95 lines) | stat: -rw-r--r-- 3,681 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK

define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: bsl:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.s, #0x7fffffff
; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
  %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
  %c = or <vscale x 4 x i32> %1, %2
  ret <vscale x 4 x i32> %c
}

define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
; CHECK-LABEL: bsl_add_sub:
; CHECK:       // %bb.0:
; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
  %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
  ret <vscale x 4 x i32> %bsl0000
}

; we are not expecting bsl instruction here. the constants do not match to fold to bsl.
define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: no_bsl_fold:
; CHECK:       // %bb.0:
; CHECK-NEXT:    and z0.s, z0.s, #0x7fffffff
; CHECK-NEXT:    and z1.s, z1.s, #0x7ffffffe
; CHECK-NEXT:    orr z0.d, z0.d, z1.d
; CHECK-NEXT:    ret
  %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
  %2 = and <vscale x 4 x i32> %b, splat(i32 2147483646)
  %c = or <vscale x 4 x i32> %1, %2
  ret <vscale x 4 x i32> %c
}

define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: nbsl_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.b, #127 // =0x7f
; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %1 = and <vscale x 16 x i8> %a, splat(i8 127)
  %2 = and <vscale x 16 x i8> %b, splat(i8 -128)
  %3 = or <vscale x 16 x i8> %1, %2
  %4 = xor <vscale x 16 x i8> %3, splat(i8 -1)
  ret <vscale x 16 x i8> %4
}

define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: nbsl_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %1 = and <vscale x 8 x i16> %a, splat(i16 32767)
  %2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
  %3 = or <vscale x 8 x i16> %1, %2
  %4 = xor <vscale x 8 x i16> %3, splat(i16 -1)
  ret <vscale x 8 x i16> %4
}

define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: nbsl_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.s, #0x7fffffff
; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
  %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
  %3 = or <vscale x 4 x i32> %1, %2
  %4 = xor <vscale x 4 x i32> %3, splat(i32 -1)
  ret <vscale x 4 x i32> %4
}

define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: nbsl_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
  %2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
  %3 = or <vscale x 2 x i64> %1, %2
  %4 = xor <vscale x 2 x i64> %3, splat(i64 -1)
  ret <vscale x 2 x i64> %4
}