File: pr48844-br-to-switch-vectorization.ll

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.6-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,304 kB
  • sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (156 lines) | stat: -rw-r--r-- 8,709 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes="default<O2>" -mattr=avx < %s | FileCheck --check-prefix=AVX %s
; RUN: opt -S -passes="default<O2>" -mattr=avx2 < %s | FileCheck --check-prefix=AVX2 %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Make sure we vectorize when branches are convered to switch.
define dso_local void @test(ptr %start, ptr %end) #0 {
;
; AVX-LABEL: @test(
; AVX-NEXT:  entry:
; AVX-NEXT:    [[I11_NOT1:%.*]] = icmp eq ptr [[START:%.*]], [[END:%.*]]
; AVX-NEXT:    br i1 [[I11_NOT1]], label [[EXIT:%.*]], label [[BB12:%.*]]
; AVX:       bb12:
; AVX-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LATCH:%.*]] ], [ [[START]], [[ENTRY:%.*]] ]
; AVX-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR2]], align 4
; AVX-NEXT:    switch i32 [[VAL]], label [[LATCH]] [
; AVX-NEXT:      i32 -12, label [[STORE:%.*]]
; AVX-NEXT:      i32 13, label [[STORE]]
; AVX-NEXT:    ]
; AVX:       store:
; AVX-NEXT:    store i32 42, ptr [[PTR2]], align 4
; AVX-NEXT:    br label [[LATCH]]
; AVX:       latch:
; AVX-NEXT:    [[PTR_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR2]], i64 4
; AVX-NEXT:    [[I11_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[END]]
; AVX-NEXT:    br i1 [[I11_NOT]], label [[EXIT]], label [[BB12]]
; AVX:       exit:
; AVX-NEXT:    ret void
;
; AVX2-LABEL: @test(
; AVX2-NEXT:  entry:
; AVX2-NEXT:    [[I11_NOT1:%.*]] = icmp eq ptr [[START:%.*]], [[END:%.*]]
; AVX2-NEXT:    br i1 [[I11_NOT1]], label [[EXIT:%.*]], label [[BB12_PREHEADER:%.*]]
; AVX2:       iter.check:
; AVX2-NEXT:    [[END3:%.*]] = ptrtoint ptr [[END]] to i64
; AVX2-NEXT:    [[START4:%.*]] = ptrtoint ptr [[START]] to i64
; AVX2-NEXT:    [[TMP0:%.*]] = add i64 [[END3]], -4
; AVX2-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START4]]
; AVX2-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
; AVX2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; AVX2-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP1]], 28
; AVX2-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[BB12_PREHEADER1:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; AVX2:       vector.main.loop.iter.check:
; AVX2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 124
; AVX2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[BB12_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]]
; AVX2:       vector.ph:
; AVX2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775776
; AVX2-NEXT:    br label [[VECTOR_BODY:%.*]]
; AVX2:       vector.body:
; AVX2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX2-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
; AVX2-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32
; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64
; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96
; AVX2-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[NEXT_GEP]], align 4
; AVX2-NEXT:    [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
; AVX2-NEXT:    [[WIDE_LOAD9:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4
; AVX2-NEXT:    [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
; AVX2-NEXT:    [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 -12)
; AVX2-NEXT:    [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 -12)
; AVX2-NEXT:    [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 -12)
; AVX2-NEXT:    [[TMP11:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 -12)
; AVX2-NEXT:    [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 13)
; AVX2-NEXT:    [[TMP13:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 13)
; AVX2-NEXT:    [[TMP14:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 13)
; AVX2-NEXT:    [[TMP15:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 13)
; AVX2-NEXT:    [[TMP16:%.*]] = or <8 x i1> [[TMP8]], [[TMP12]]
; AVX2-NEXT:    [[TMP17:%.*]] = or <8 x i1> [[TMP9]], [[TMP13]]
; AVX2-NEXT:    [[TMP18:%.*]] = or <8 x i1> [[TMP10]], [[TMP14]]
; AVX2-NEXT:    [[TMP19:%.*]] = or <8 x i1> [[TMP11]], [[TMP15]]
; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[NEXT_GEP]], i32 4, <8 x i1> [[TMP16]])
; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP5]], i32 4, <8 x i1> [[TMP17]])
; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP6]], i32 4, <8 x i1> [[TMP18]])
; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP7]], i32 4, <8 x i1> [[TMP19]])
; AVX2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; AVX2-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AVX2-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; AVX2:       middle.block:
; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; AVX2-NEXT:    br i1 [[CMP_N]], label [[EXIT]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; AVX2:       vec.epilog.iter.check:
; AVX2-NEXT:    [[TMP26:%.*]] = shl i64 [[N_VEC]], 2
; AVX2-NEXT:    [[IND_END11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP26]]
; AVX2-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[TMP3]], 24
; AVX2-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; AVX2-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[BB12_PREHEADER1]], label [[BB12_PREHEADER11]]
; AVX2:       vec.epilog.ph:
; AVX2-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; AVX2-NEXT:    [[N_VEC10:%.*]] = and i64 [[TMP3]], 9223372036854775800
; AVX2-NEXT:    [[TMP21:%.*]] = shl i64 [[N_VEC10]], 2
; AVX2-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP21]]
; AVX2-NEXT:    br label [[BB12:%.*]]
; AVX2:       vec.epilog.vector.body:
; AVX2-NEXT:    [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[BB12_PREHEADER11]] ], [ [[INDEX_NEXT16:%.*]], [[BB12]] ]
; AVX2-NEXT:    [[OFFSET_IDX13:%.*]] = shl i64 [[INDEX12]], 2
; AVX2-NEXT:    [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX13]]
; AVX2-NEXT:    [[WIDE_LOAD15:%.*]] = load <8 x i32>, ptr [[NEXT_GEP14]], align 4
; AVX2-NEXT:    [[TMP22:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD15]], splat (i32 -12)
; AVX2-NEXT:    [[TMP23:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD15]], splat (i32 13)
; AVX2-NEXT:    [[TMP24:%.*]] = or <8 x i1> [[TMP22]], [[TMP23]]
; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[NEXT_GEP14]], i32 4, <8 x i1> [[TMP24]])
; AVX2-NEXT:    [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 8
; AVX2-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC10]]
; AVX2-NEXT:    br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[BB12]], !llvm.loop [[LOOP3:![0-9]+]]
; AVX2:       vec.epilog.middle.block:
; AVX2-NEXT:    [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]]
; AVX2-NEXT:    br i1 [[CMP_N17]], label [[EXIT]], label [[BB12_PREHEADER1]]
; AVX2:       bb12.preheader:
; AVX2-NEXT:    [[PTR2_PH:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[BB12_PREHEADER]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; AVX2-NEXT:    br label [[BB13:%.*]]
; AVX2:       bb12:
; AVX2-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LATCH:%.*]] ], [ [[PTR2_PH]], [[BB12_PREHEADER1]] ]
; AVX2-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR2]], align 4
; AVX2-NEXT:    switch i32 [[VAL]], label [[LATCH]] [
; AVX2-NEXT:      i32 -12, label [[STORE:%.*]]
; AVX2-NEXT:      i32 13, label [[STORE]]
; AVX2-NEXT:    ]
; AVX2:       store:
; AVX2-NEXT:    store i32 42, ptr [[PTR2]], align 4
; AVX2-NEXT:    br label [[LATCH]]
; AVX2:       latch:
; AVX2-NEXT:    [[PTR_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR2]], i64 4
; AVX2-NEXT:    [[I11_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[END]]
; AVX2-NEXT:    br i1 [[I11_NOT]], label [[EXIT]], label [[BB13]], !llvm.loop [[LOOP4:![0-9]+]]
; AVX2:       exit:
; AVX2-NEXT:    ret void
;
entry:
  br label %header

header:
  %ptr = phi ptr [ %start, %entry ], [ %ptr.next, %latch ]
  %i11 = icmp ne ptr %ptr, %end
  br i1 %i11, label %bb12, label %exit

bb12:
  %val = load i32, ptr %ptr, align 4
  %c1 = icmp eq i32 %val, 13
  %c2 = icmp eq i32 %val, -12
  %c3 = or i1 %c1, %c2
  br i1 %c3, label %store, label %latch

store:
  store i32 42, ptr %ptr, align 4
  br label %latch

latch:
  %ptr.next = getelementptr inbounds i32, ptr %ptr, i32 1
  br label %header

exit:
  ret void
}