File: promote-alloca-to-lds-select.ll

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (132 lines) | stat: -rw-r--r-- 6,920 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck %s

target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"

; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
; CHECK: %alloca = alloca i32
; CHECK: select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %alloca
define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
  %alloca = alloca i32, align 4, addrspace(5)
  %select = select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %alloca
  store i32 0, ptr addrspace(5) %select, align 4
  ret void
}

; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers(
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(3) [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(3) [[ARRAYGEP]], i32 0, i32 %b
; CHECK: %select = select i1 undef, ptr addrspace(3) %ptr0, ptr addrspace(3) %ptr1
; CHECK: store i32 0, ptr addrspace(3) %select, align 4
define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
  %alloca = alloca [16 x i32], align 4, addrspace(5)
  %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
  %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %b
  %select = select i1 undef, ptr addrspace(5) %ptr0, ptr addrspace(5) %ptr1
  store i32 0, ptr addrspace(5) %select, align 4
  ret void
}

; FIXME: This should be promotable but requires knowing that both will be promoted first.

; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
; CHECK: %alloca0 = alloca i32, i32 16, align 4
; CHECK: %alloca1 = alloca i32, i32 16, align 4
; CHECK: %ptr0 = getelementptr inbounds i32, ptr addrspace(5) %alloca0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %alloca1, i32 %b
; CHECK: %select = select i1 undef, ptr addrspace(5) %ptr0, ptr addrspace(5) %ptr1
define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
  %alloca0 = alloca i32, i32 16, align 4, addrspace(5)
  %alloca1 = alloca i32, i32 16, align 4, addrspace(5)
  %ptr0 = getelementptr inbounds i32, ptr addrspace(5) %alloca0, i32 %a
  %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %alloca1, i32 %b
  %select = select i1 undef, ptr addrspace(5) %ptr0, ptr addrspace(5) %ptr1
  store i32 0, ptr addrspace(5) %select, align 4
  ret void
}

; TODO: Maybe this should be canonicalized to select on the constant and GEP after.
; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers(
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(3) [[ARRAYGEP]], i32 0, i32 1
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(3) [[ARRAYGEP]], i32 0, i32 3
; CHECK: %select = select i1 undef, ptr addrspace(3) %ptr0, ptr addrspace(3) %ptr1
; CHECK: store i32 0, ptr addrspace(3) %select, align 4
define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
  %alloca = alloca [16 x i32], align 4, addrspace(5)
  %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
  %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 3
  %select = select i1 undef, ptr addrspace(5) %ptr0, ptr addrspace(5) %ptr1
  store i32 0, ptr addrspace(5) %select, align 4
  ret void
}

; FIXME: Can be promoted, but we'd have to recursively show that the select
; operands all point to the same alloca.

; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
; CHECK: alloca
define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c, i1 %c1, i1 %c2) #0 {
  %alloca = alloca [16 x i32], align 4, addrspace(5)
  %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
  %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %b
  %ptr2 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %c
  %select0 = select i1 %c1, ptr addrspace(5) %ptr0, ptr addrspace(5) %ptr1
  %select1 = select i1 %c2, ptr addrspace(5) %select0, ptr addrspace(5) %ptr2
  store i32 0, ptr addrspace(5) %select1, align 4
  ret void
}

define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c, i1 %c0) #0 {
entry:
  %alloca = alloca [16 x i32], align 4, addrspace(5)
  %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
  %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %b
  store i32 0, ptr addrspace(5) %ptr0
  br i1 %c0, label %bb1, label %bb2

bb1:
  %ptr2 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %c
  %select0 = select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %ptr2
  store i32 0, ptr addrspace(5) %ptr1
  br label %bb2

bb2:
  %phi.ptr = phi ptr addrspace(5) [ %ptr0, %entry ], [ %select0, %bb1 ]
  %select1 = select i1 undef, ptr addrspace(5) %phi.ptr, ptr addrspace(5) %ptr1
  store i32 0, ptr addrspace(5) %select1, align 4
  ret void
}

; CHECK-LABEL: @select_null_rhs(
; CHECK-NOT: alloca
; CHECK: select i1 %tmp2, ptr addrspace(3) %{{[0-9]+}}, ptr addrspace(3) null
define amdgpu_kernel void @select_null_rhs(ptr addrspace(1) nocapture %arg, i32 %arg1) #1 {
bb:
  %tmp = alloca double, align 8, addrspace(5)
  store double 0.000000e+00, ptr addrspace(5) %tmp, align 8
  %tmp2 = icmp eq i32 %arg1, 0
  %tmp3 = select i1 %tmp2, ptr addrspace(5) %tmp, ptr addrspace(5) null
  store double 1.000000e+00, ptr addrspace(5) %tmp3, align 8
  %tmp4 = load double, ptr addrspace(5) %tmp, align 8
  store double %tmp4, ptr addrspace(1) %arg
  ret void
}

; CHECK-LABEL: @select_null_lhs(
; CHECK-NOT: alloca
; CHECK: select i1 %tmp2, ptr addrspace(3) null, ptr addrspace(3) %{{[0-9]+}}
define amdgpu_kernel void @select_null_lhs(ptr addrspace(1) nocapture %arg, i32 %arg1) #1 {
bb:
  %tmp = alloca double, align 8, addrspace(5)
  store double 0.000000e+00, ptr addrspace(5) %tmp, align 8
  %tmp2 = icmp eq i32 %arg1, 0
  %tmp3 = select i1 %tmp2, ptr addrspace(5) null, ptr addrspace(5) %tmp
  store double 1.000000e+00, ptr addrspace(5) %tmp3, align 8
  %tmp4 = load double, ptr addrspace(5) %tmp, align 8
  store double %tmp4, ptr addrspace(1) %arg
  ret void
}

attributes #0 = { norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
attributes #1 = { norecurse nounwind }