File: gep_simplification.ll

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (144 lines) | stat: -rw-r--r-- 6,243 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2022-2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
;
; ------------------------------------------------
;
; REQUIRES: opaque-ptr-fix, llvm-14-plus, regkeys
;
; RUN: igc_opt --opaque-pointers -platformpvc --igc-gep-lowering -regkey=EnableGEPSimplification=1,TestGEPSimplification=1 -S %s  | FileCheck %s
; ------------------------------------------------
; GEPLowering/GEP simplification : testing GEP strength reduction
; ------------------------------------------------

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
target triple = "spir64-unknown-unknown"

;
; CHECK-LABEL: define spir_kernel void @test_gep
;

; Function Attrs: convergent nounwind
define spir_kernel void @test_gep(i32 addrspace(1)* %dst, i32 addrspace(1)* %src, i64 %inc0, i64 %inc1, i64 %Offset64, i32 %Offset32) #0 {
;
; case 1 : gep's id is sext/zext
;
; CHECK-LABEL: case1:
; CHECK: [[C1T0:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %{{.*}}
; CHECK: {{.*}} = getelementptr inbounds i32, i32 addrspace(1)* [[C1T0]], i64 4
; CHECK: {{.*}} = getelementptr inbounds i32, i32 addrspace(1)* [[C1T0]], i64 8
; CHECK: {{.*}} = getelementptr inbounds i32, i32 addrspace(1)* [[C1T0]], i64 12
;
case1:
  %simdLaneId16 = call i16 @llvm.genx.GenISA.simdLaneId()
  %simdLaneId = zext i16 %simdLaneId16 to i32
  %idbase1 = add nsw i32 %Offset32, %simdLaneId
  %id1.1 = zext i32 %idbase1 to i64
  %addr1.1 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.1
  %res1.0 = load i32, i32 addrspace(1)* %addr1.1, align 4
  %add11.1 = add nsw i32 %idbase1, 4
  %id1.2 = zext i32 %add11.1 to i64
  %addr1.2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.2
  %res1.1 = load i32, i32 addrspace(1)* %addr1.2, align 4
  %sum1.0 = add nsw i32 %res1.0, %res1.1
  %add11.2 = add nsw i32 %idbase1, 8
  %id1.3 = zext i32 %add11.2 to i64
  %addr1.3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.3
  %res1.2 = load i32, i32 addrspace(1)* %addr1.3, align 4
  %sum1.1 = add nsw i32 %sum1.0, %res1.2
  %add11.3 = add nsw i32 %idbase1, 12
  %id1.4 = zext i32 %add11.3 to i64
  %addr1.4 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.4
  %res1.3 = load i32, i32 addrspace(1)* %addr1.4, align 4
  %sum1.2 = add nsw i32 %sum1.1, %res1.3
  %id1.5 = sext i32 %idbase1 to i64
  %addr1.5 = getelementptr inbounds i32, i32 addrspace(1)* %dst, i64 %id1.5
  store i32 %sum1.2, i32 addrspace(1)* %addr1.5, align 4
  br label %case2

;
; case 2 : gep's id is add/sub
;
; CHECK-LABEL: case2:
; CHECK: [[C2T0:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %{{.*}}
; CHECK: [[C2T1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C2T0]], i64 10
; CHECK: [[C2T2:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C2T0]], i64 20
; CHECK: [[C2T3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C2T0]], i64 30
;
case2:
  %simdLaneId64 = zext i16 %simdLaneId16 to i64
  %idbase2 = add i64 %Offset64, %simdLaneId64
  %addr2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %idbase2
  %res2.0 = load i32, i32 addrspace(1)* %addr2, align 4
  %id2.1 = add nsw i64 %idbase2, 10
  %addr2.1 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id2.1
  %res2.1 = load i32, i32 addrspace(1)* %addr2.1, align 4
  %sum2.0 = add nsw i32 %res2.0, %res2.1
  %id2.2 = add nsw i64 %idbase2, 20
  %addr2.2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id2.2
  %res2.2 = load i32, i32 addrspace(1)* %addr2.2, align 4
  %sum2.1 = add nsw i32 %sum2.0, %res2.2
  %id2.3 = add nsw i64 %idbase2, 30
  %addr2.3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id2.3
  %res2.3 = load i32, i32 addrspace(1)* %addr2.3, align 4
  %sum2.2 = add nsw i32 %sum2.1, %res2.3
  %addr2.4 = getelementptr inbounds i32, i32 addrspace(1)* %dst, i64 %idbase2
  store i32 %sum2.2, i32 addrspace(1)* %addr2.4, align 4
  br label %case3

;
; case 3 : address inc b/w two GEPs is a variable (BB-level invariant), not a constant
;
; CHECK-LABEL: case3:
; CHECK: [[C3T0:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %{{.*}}
; CHECK: [[C3T1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C3T0]], i64 %inc0
;
; // to match add [nsw] i64 %inc1, 1. It is the 2nd add from here
;
; CHECK: %sum3.0 = add
; CHECK: [[C3T2:%.*]] = add
; CHECK-SAME: i64 %inc1, 1
;
; CHECK: [[C3T3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C3T0]], i64 [[C3T2]]
; CHECK: [[C3T4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C3T0]], i64 128
;
case3:
  %addr3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %idbase2
  %res3.0 = load i32, i32 addrspace(1)* %addr3, align 4
  %id3.1 = add nsw i64 %idbase2, %inc0
  %addr3.1 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id3.1
  %res3.1 = load i32, i32 addrspace(1)* %addr3.1, align 4
  %sum3.0 = add nsw i32 %res3.0, %res3.1
  %incinc1 = add nsw i64 %inc1, 1
  %id3.2 = add nsw i64 %idbase2, %incinc1
  %addr3.2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id3.2
  %res3.2 = load i32, i32 addrspace(1)* %addr3.2, align 4
  %sum3.1 = add nsw i32 %sum3.0, %res3.2
  %id3.3 = add nsw i64 %idbase2, 128
  %addr3.3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id3.3
  %res3.3 = load i32, i32 addrspace(1)* %addr3.3, align 4
  %sum3.2 = add nsw i32 %sum3.1, %res3.3
  %addr3.4 = getelementptr inbounds i32, i32 addrspace(1)* %dst, i64 %idbase2
  store i32 %sum3.2, i32 addrspace(1)* %addr3.4, align 4
;
; CHECK: ret void
;
  ret void
}

; Function Attrs: nounwind readnone
declare i16 @llvm.genx.GenISA.simdLaneId() #1

attributes #0 = { convergent nounwind "less-precise-fpmad"="true" }
attributes #1 = { nounwind readnone }

!igc.functions = !{!0}

!0 = !{void (i32 addrspace(1)*, i32 addrspace(1)*, i64, i64, i64, i32)* @test_gep, !1}
!1 = !{!2, !3}
!2 = !{!"function_type", i32 0}
!3 = !{!"implicit_arg_desc"}