File: replace-lds-by-ptr-indirect-call-selected_functions.ll

package info (click to toggle)
llvm-toolchain-14 1%3A14.0.6-12
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,496,180 kB
  • sloc: cpp: 5,593,972; ansic: 986,872; asm: 585,869; python: 184,223; objc: 72,530; lisp: 31,119; f90: 27,793; javascript: 9,780; pascal: 9,762; sh: 9,482; perl: 7,468; ml: 5,432; awk: 3,523; makefile: 2,538; xml: 953; cs: 573; fortran: 567
file content (151 lines) | stat: -rw-r--r-- 7,301 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
; RUN: opt -S -mtriple=amdgcn--  -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s

; DESCRIPTION:
;
; There are three lds globals defined here, and these three lds are used respectively within
; three non-kernel functions. There are three kernels, which *indirectly* call two of the
; non-kernel functions. Hence pointer replacement should take place for all three lds, and
; pointer initialization within kernel should selectively happen depending on which lds is
; reachable from the kernel.
;

; Original LDS should exist.
; CHECK: @lds_used_within_function_1 = internal addrspace(3) global [4 x i32] undef, align 4
; CHECK: @lds_used_within_function_2 = internal addrspace(3) global [4 x i32] undef, align 4
; CHECK: @lds_used_within_function_3 = internal addrspace(3) global [4 x i32] undef, align 4
@lds_used_within_function_1 = internal addrspace(3) global [4 x i32] undef, align 4
@lds_used_within_function_2 = internal addrspace(3) global [4 x i32] undef, align 4
@lds_used_within_function_3 = internal addrspace(3) global [4 x i32] undef, align 4

; Function pointers should exist.
; CHECK: @ptr_to_func1 = internal local_unnamed_addr externally_initialized global void (float)* @function_1, align 8
; CHECK: @ptr_to_func2 = internal local_unnamed_addr externally_initialized global void (i16)* @function_2, align 8
; CHECK: @ptr_to_func3 = internal local_unnamed_addr externally_initialized global void (i8)* @function_3, align 8
@ptr_to_func1 = internal local_unnamed_addr externally_initialized global void (float)* @function_1, align 8
@ptr_to_func2 = internal local_unnamed_addr externally_initialized global void (i16)* @function_2, align 8
@ptr_to_func3 = internal local_unnamed_addr externally_initialized global void (i8)* @function_3, align 8

; Pointers should be created.
; CHECK: @lds_used_within_function_1.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
; CHECK: @lds_used_within_function_2.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
; CHECK: @lds_used_within_function_3.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2

; Pointer replacement code should be added.
define internal void @function_3(i8 %c) {
; CHECK-LABEL: entry:
; CHECK:   %0 = load i16, i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
; CHECK:   %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
; CHECK:   %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
; CHECK:   %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
; CHECK:   ret void
entry:
  %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_3, i32 0, i32 0
  ret void
}

; Pointer replacement code should be added.
define internal void @function_2(i16 %i) {
; CHECK-LABEL: entry:
; CHECK:   %0 = load i16, i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
; CHECK:   %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
; CHECK:   %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
; CHECK:   %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
; CHECK:   ret void
entry:
  %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_2, i32 0, i32 0
  ret void
}

; Pointer replacement code should be added.
define internal void @function_1(float %f) {
; CHECK-LABEL: entry:
; CHECK:   %0 = load i16, i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
; CHECK:   %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
; CHECK:   %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
; CHECK:   %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
; CHECK:   ret void
entry:
  %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function_1, i32 0, i32 0
  ret void
}

; Pointer initialization code shoud be added
define protected amdgpu_kernel void @kernel_calls_function_3_and_1() {
; CHECK-LABEL: entry:
; CHECK:   %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK:   %1 = icmp eq i32 %0, 0
; CHECK:   br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
; CHECK:   store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
; CHECK:   store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
; CHECK:   br label %3
;
; CHECK-LABEL: 3:
; CHECK:   call void @llvm.amdgcn.wave.barrier()
; CHECK:   %fptr3 = load void (i8)*, void (i8)** @ptr_to_func3, align 8
; CHECK:   %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
; CHECK:   call void %fptr3(i8 1)
; CHECK:   call void %fptr1(float 2.000000e+00)
; CHECK:   ret void
entry:
  %fptr3 = load void (i8)*, void (i8)** @ptr_to_func3, align 8
  %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
  call void %fptr3(i8 1)
  call void %fptr1(float 2.0)
  ret void
}

; Pointer initialization code shoud be added
define protected amdgpu_kernel void @kernel_calls_function_2_and_3() {
; CHECK-LABEL: entry:
; CHECK:   %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK:   %1 = icmp eq i32 %0, 0
; CHECK:   br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
; CHECK:   store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
; CHECK:   store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
; CHECK:   br label %3
;
; CHECK-LABEL: 3:
; CHECK:   call void @llvm.amdgcn.wave.barrier()
; CHECK:   %fptr2 = load void (i16)*, void (i16)** @ptr_to_func2, align 8
; CHECK:   %fptr3 = load void (i8)*, void (i8)** @ptr_to_func3, align 8
; CHECK:   call void %fptr2(i16 3)
; CHECK:   call void %fptr3(i8 4)
; CHECK:   ret void
entry:
  %fptr2 = load void (i16)*, void (i16)** @ptr_to_func2, align 8
  %fptr3 = load void (i8)*, void (i8)** @ptr_to_func3, align 8
  call void %fptr2(i16 3)
  call void %fptr3(i8 4)
  ret void
}

; Pointer initialization code shoud be added
define protected amdgpu_kernel void @kernel_calls_function_1_and_2() {
; CHECK-LABEL: entry:
; CHECK:   %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK:   %1 = icmp eq i32 %0, 0
; CHECK:   br i1 %1, label %2, label %3
;
; CHECK-LABEL: 2:
; CHECK:   store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
; CHECK:   store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
; CHECK:   br label %3
;
; CHECK-LABEL: 3:
; CHECK:   call void @llvm.amdgcn.wave.barrier()
; CHECK:   %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
; CHECK:   %fptr2 = load void (i16)*, void (i16)** @ptr_to_func2, align 8
; CHECK:   call void %fptr1(float 5.000000e+00)
; CHECK:   call void %fptr2(i16 6)
; CHECK:   ret void
entry:
  %fptr1 = load void (float)*, void (float)** @ptr_to_func1, align 8
  %fptr2 = load void (i16)*, void (i16)** @ptr_to_func2, align 8
  call void %fptr1(float 5.0)
  call void %fptr2(i16 6)
  ret void
}