File: po2-shift-add-and-to-zero.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (151 lines) | stat: -rw-r--r-- 4,631 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -passes=instsimplify -S < %s | FileCheck %s

;; The and X, (add Y, -1) pattern is from an earlier instcombine pass which
;; converted

;; define i64 @f1() #0 {
;; entry:
;;   %0 = call i64 @llvm.aarch64.sve.cntb(i32 31)
;;   %1 = call i64 @llvm.aarch64.sve.cnth(i32 31)
;;   %rem = urem i64 %0, %1
;;   ret i64 %rem
;; }

;; into

;; define i64 @f1() #0 {
;; entry:
;;   %0 = call i64 @llvm.vscale.i64()
;;   %1 = shl nuw nsw i64 %0, 4
;;   %2 = call i64 @llvm.vscale.i64()
;;   %3 = shl nuw nsw i64 %2, 3
;;   %4 = add nsw i64 %3, -1
;;   %rem = and i64 %1, %4
;;   ret i64 %rem
;; }

;; InstCombine would have folded the original to returning 0 if the vscale
;; calls were the same Value*, but since there's two of them it doesn't
;; work and we convert the urem to add/and. CSE then gets rid of the extra
;; vscale, leaving us with a new pattern to match. This only works because
;; vscale is known to be a power of 2 (assuming there's a defined range for it).

define i64 @f1() #0 {
; CHECK-LABEL: define i64 @f1
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret i64 0
;
entry:
  %0 = call i64 @llvm.vscale.i64()
  %1 = shl i64 %0, 4
  %2 = shl i64 %0, 3
  %3 = add i64 %2, -1
  %rem = and i64 %1, %3
  ret i64 %rem
}

;; Make sure it works if the value could also be zero.
define i64 @test_pow2_or_zero(i64 %arg) {
; CHECK-LABEL: define i64 @test_pow2_or_zero
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    ret i64 0
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 4
  %shl2 = shl i64 %x, 3
  %mask = add i64 %shl2, -1
  %rem = and i64 %mask, %shl1
  ret i64 %rem
}

;; Make sure it doesn't work if the value isn't known to be a power of 2.
;; In this case a vscale without a `vscale_range` attribute on the function.
define i64 @no_pow2() {
; CHECK-LABEL: define i64 @no_pow2() {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 3
; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT:    ret i64 [[REM]]
;
entry:
  %0 = call i64 @llvm.vscale.i64()
  %1 = shl i64 %0, 4
  %2 = shl i64 %0, 3
  %3 = add i64 %2, -1
  %rem = and i64 %1, %3
  ret i64 %rem
}

;; Make sure it doesn't work if the shift on the -1 side is greater
define i64 @minus_shift_greater(i64 %arg) {
; CHECK-LABEL: define i64 @minus_shift_greater
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 3
; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[X]], 4
; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -1
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
; CHECK-NEXT:    ret i64 [[REM]]
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 3
  %shl2 = shl i64 %x, 4
  %mask = add i64 %shl2, -1
  %rem = and i64 %shl1, %mask
  ret i64 %rem
}

;; Make sure it doesn't work if the subtract isn't one.
define i64 @sub2(i64 %arg) {
; CHECK-LABEL: define i64 @sub2
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 4
; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[X]], 3
; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -2
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
; CHECK-NEXT:    ret i64 [[REM]]
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 4
  %shl2 = shl i64 %x, 3
  %mask = add i64 %shl2, -2
  %rem = and i64 %shl1, %mask
  ret i64 %rem
}

;; Make sure it doesn't work with a right shift
;; Make sure it doesn't work if the subtract isn't one.
define i64 @rightshift(i64 %arg) {
; CHECK-LABEL: define i64 @rightshift
; CHECK-SAME: (i64 [[ARG:%.*]]) {
; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 4
; CHECK-NEXT:    [[SHL2:%.*]] = lshr i64 [[X]], 3
; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -1
; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
; CHECK-NEXT:    ret i64 [[REM]]
;
  %neg = sub i64 0, %arg
  %x = and i64 %neg, %arg
  %shl1 = shl i64 %x, 4
  %shl2 = lshr i64 %x, 3
  %mask = add i64 %shl2, -1
  %rem = and i64 %shl1, %mask
  ret i64 %rem
}

declare i64 @llvm.vscale.i64()

attributes #0 = { vscale_range(1,16) }