File: massive_indirection.ll

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.6-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,304 kB
  • sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (180 lines) | stat: -rw-r--r-- 9,666 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt %s -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu=skx -S -o - | FileCheck %s

; This test verifies that the vectorizer can handle an extended sequence of
; getelementptr instructions and generate longer vectors. With special handling,
; some elements can still be vectorized even if they require looking up the
; common underlying object deeper than 6 levels from the original pointer.

; The test below is the simplified version of actual performance oriented
; workload; the offsets in getelementptr instructions are similar or same for
; the test simplicity.

define void @v1_v2_v4_v1_to_v8_levels_6_7_8_8(i32 %arg0, ptr align 16 %arg1) {
; CHECK-LABEL: define void @v1_v2_v4_v1_to_v8_levels_6_7_8_8(
; CHECK-SAME: i32 [[ARG0:%.*]], ptr align 16 [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:    [[LEVEL1:%.*]] = getelementptr i8, ptr [[ARG1]], i32 917504
; CHECK-NEXT:    [[LEVEL2:%.*]] = getelementptr i8, ptr [[LEVEL1]], i32 [[ARG0]]
; CHECK-NEXT:    [[LEVEL3:%.*]] = getelementptr i8, ptr [[LEVEL2]], i32 32768
; CHECK-NEXT:    [[LEVEL4:%.*]] = getelementptr i8, ptr [[LEVEL3]], i32 [[ARG0]]
; CHECK-NEXT:    [[LEVEL5:%.*]] = getelementptr i8, ptr [[LEVEL4]], i32 [[ARG0]]
; CHECK-NEXT:    [[A6:%.*]] = getelementptr i8, ptr [[LEVEL5]], i32 [[ARG0]]
; CHECK-NEXT:    store <8 x half> zeroinitializer, ptr [[A6]], align 16
; CHECK-NEXT:    ret void
;

  %level1 = getelementptr i8, ptr %arg1, i32 917504
  %level2 = getelementptr i8, ptr %level1, i32 %arg0
  %level3 = getelementptr i8, ptr %level2, i32 32768
  %level4 = getelementptr i8, ptr %level3, i32 %arg0
  %level5 = getelementptr i8, ptr %level4, i32 %arg0

  %a6 = getelementptr i8, ptr %level5, i32 %arg0
  %b7 = getelementptr i8, ptr %a6, i32 2
  %c8 = getelementptr i8, ptr %b7, i32 8
  %d8 = getelementptr i8, ptr %b7, i32 12

  store half 0xH0000, ptr %a6, align 16
  store <4 x half> zeroinitializer, ptr %b7, align 2
  store <2 x half> zeroinitializer, ptr %c8, align 2
  store half 0xH0000, ptr %d8, align 2
  ret void
}

define void @v1x8_levels_6_7_8_9_10_11_12_13(i32 %arg0, ptr align 16 %arg1) {
; CHECK-LABEL: define void @v1x8_levels_6_7_8_9_10_11_12_13(
; CHECK-SAME: i32 [[ARG0:%.*]], ptr align 16 [[ARG1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LEVEL1:%.*]] = getelementptr i8, ptr [[ARG1]], i32 917504
; CHECK-NEXT:    [[LEVEL2:%.*]] = getelementptr i8, ptr [[LEVEL1]], i32 [[ARG0]]
; CHECK-NEXT:    [[LEVEL3:%.*]] = getelementptr i8, ptr [[LEVEL2]], i32 32768
; CHECK-NEXT:    [[LEVEL4:%.*]] = getelementptr i8, ptr [[LEVEL3]], i32 [[ARG0]]
; CHECK-NEXT:    [[LEVEL5:%.*]] = getelementptr i8, ptr [[LEVEL4]], i32 [[ARG0]]
; CHECK-NEXT:    [[A6:%.*]] = getelementptr i8, ptr [[LEVEL5]], i32 [[ARG0]]
; CHECK-NEXT:    store <8 x half> zeroinitializer, ptr [[A6]], align 16
; CHECK-NEXT:    ret void
;

  %level1 = getelementptr i8, ptr %arg1, i32 917504
  %level2 = getelementptr i8, ptr %level1, i32 %arg0
  %level3 = getelementptr i8, ptr %level2, i32 32768
  %level4 = getelementptr i8, ptr %level3, i32 %arg0
  %level5 = getelementptr i8, ptr %level4, i32 %arg0

  %a6 = getelementptr i8, ptr %level5, i32 %arg0
  %b7 = getelementptr i8, ptr %a6, i32 2
  %c8 = getelementptr i8, ptr %b7, i32 2
  %d9 = getelementptr i8, ptr %c8, i32 2
  %e10 = getelementptr i8, ptr %d9, i32 2
  %f11 = getelementptr i8, ptr %e10, i32 2
  %g12 = getelementptr i8, ptr %f11, i32 2
  %h13 = getelementptr i8, ptr %g12, i32 2

  store half 0xH0000, ptr %a6, align 16
  store half 0xH0000, ptr %b7, align 2
  store half 0xH0000, ptr %c8, align 2
  store half 0xH0000, ptr %d9, align 2
  store half 0xH0000, ptr %e10, align 8
  store half 0xH0000, ptr %f11, align 2
  store half 0xH0000, ptr %g12, align 2
  store half 0xH0000, ptr %h13, align 2
  ret void
}

define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(i32 %arg0, ptr addrspace(3) align 16 %arg1_ptr, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, half %arg6_half, half %arg7_half, <2 x half> %arg8_2xhalf) {
; CHECK-LABEL: define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(
; CHECK-SAME: i32 [[ARG0:%.*]], ptr addrspace(3) align 16 [[ARG1_PTR:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]], i32 [[ARG4:%.*]], i32 [[ARG5:%.*]], half [[ARG6_HALF:%.*]], half [[ARG7_HALF:%.*]], <2 x half> [[ARG8_2XHALF:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARG1_PTR]], i32 458752
; CHECK-NEXT:    br [[DOTPREHEADER11_PREHEADER:label %.*]]
; CHECK:       [[_PREHEADER11_PREHEADER:.*:]]
; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[ARG0]], 6
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP1]], i32 [[TMP2]]
; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[ARG2]]
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[ARG3]]
; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ARG0]], 2
; CHECK-NEXT:    br i1 [[CMP]], [[DOTLR_PH:label %.*]], [[DOTEXIT_POINT:label %.*]]
; CHECK:       [[_LR_PH:.*:]]
; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP5]], i32 [[ARG4]]
; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP]], i32 [[ARG5]]
; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x half> poison, half [[ARG6_HALF]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x half> [[TMP7]], half 0xH0000, i32 1
; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x half> [[TMP8]], half 0xH0000, i32 2
; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x half> [[TMP9]], half 0xH0000, i32 3
; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x half> [[TMP10]], half 0xH0000, i32 4
; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x half> [[ARG8_2XHALF]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x half> [[TMP11]], half [[TMP12]], i32 5
; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x half> [[ARG8_2XHALF]], i32 1
; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x half> [[TMP13]], half [[TMP14]], i32 6
; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x half> [[TMP15]], half [[ARG7_HALF]], i32 7
; CHECK-NEXT:    store <8 x half> [[TMP16]], ptr addrspace(3) [[TMP6]], align 2
; CHECK-NEXT:    br [[DOTEXIT_POINT]]
; CHECK:       [[_EXIT_POINT:.*:]]
; CHECK-NEXT:    ret void
;
  %base1 = getelementptr inbounds i8, ptr addrspace(3) %arg1_ptr, i32 458752
  br label %.preheader11.preheader

.preheader11.preheader:
  %base2 = shl nuw nsw i32 %arg0, 6
  %base3 = getelementptr inbounds i8, ptr addrspace(3) %base1, i32 %base2

  %base4 = getelementptr inbounds i8, ptr addrspace(3) %base3, i32 %arg2
  %base5 = getelementptr inbounds i8, ptr addrspace(3) %base4, i32 %arg3

  %cmp = icmp sgt i32 %arg0, 2
  br i1 %cmp, label %.lr.ph, label %.exit_point

.lr.ph:
  %gep = getelementptr inbounds i8, ptr addrspace(3) %base5, i32 %arg4

  %dst = getelementptr inbounds i8, ptr addrspace(3) %gep, i32 %arg5
  %dst_off2 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 2
  %dst_off10 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 10
  %dst_off14 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 14

  store half %arg6_half, ptr addrspace(3) %dst, align 2
  store <4 x half> zeroinitializer, ptr addrspace(3) %dst_off2, align 2
  store <2 x half> %arg8_2xhalf, ptr addrspace(3) %dst_off10, align 2
  store half %arg7_half, ptr addrspace(3) %dst_off14, align 2
  br label %.exit_point

.exit_point:
  ret void
}

; The regression test for merging equivalence classes. It is reduced and adapted
; for LSV from llvm/test/CodeGen/NVPTX/variadics-backend.ll, which failed at
; post-commit checks with memory sanitizer on the initial attempt to implement
; the merging of the equivalence classes.
define void @variadics1(ptr %vlist) {
; CHECK-LABEL: define void @variadics1(
; CHECK-SAME: ptr [[VLIST:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[ARGP_CUR7_ALIGNED2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[VLIST]], i64 0)
; CHECK-NEXT:    [[ARGP_NEXT8:%.*]] = getelementptr i8, ptr [[ARGP_CUR7_ALIGNED2]], i64 8
; CHECK-NEXT:    [[X0:%.*]] = getelementptr i8, ptr [[ARGP_NEXT8]], i32 7
; CHECK-NEXT:    [[ARGP_CUR11_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X0]], i64 0)
; CHECK-NEXT:    [[ARGP_NEXT12:%.*]] = getelementptr i8, ptr [[ARGP_CUR11_ALIGNED]], i64 8
; CHECK-NEXT:    [[X2:%.*]] = getelementptr i8, ptr [[ARGP_NEXT12]], i32 7
; CHECK-NEXT:    [[ARGP_CUR16_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X2]], i64 0)
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARGP_CUR16_ALIGNED]], align 4294967296
; CHECK-NEXT:    [[X31:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT:    [[X42:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
; CHECK-NEXT:    [[X5:%.*]] = fadd double [[X42]], [[X31]]
; CHECK-NEXT:    store double [[X5]], ptr null, align 8
; CHECK-NEXT:    ret void
;
  %argp.cur7.aligned2 = call ptr @llvm.ptrmask.p0.i64(ptr %vlist, i64 0)
  %argp.next8 = getelementptr i8, ptr %argp.cur7.aligned2, i64 8
  %x0 = getelementptr i8, ptr %argp.next8, i32 7
  %argp.cur11.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x0, i64 0)
  %argp.next12 = getelementptr i8, ptr %argp.cur11.aligned, i64 8
  %x2 = getelementptr i8, ptr %argp.next12, i32 7
  %argp.cur16.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x2, i64 0)
  %x3 = load double, ptr %argp.cur16.aligned, align 8
  %argp.cur16.aligned_off8 = getelementptr i8, ptr %argp.cur16.aligned, i32 8
  %x4 = load double, ptr %argp.cur16.aligned_off8, align 8
  %x5 = fadd double %x4, %x3
  store double %x5, ptr null, align 8
  ret void
}

declare ptr @llvm.ptrmask.p0.i64(ptr, i64)