File: tag-invariant-loads.ll

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 2,245,044 kB
  • sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,666; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (136 lines) | stat: -rw-r--r-- 5,576 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=nvptx-tag-invariant-loads < %s -mcpu=sm_80 | FileCheck %s --check-prefix=OPT
; RUN: llc -o - < %s -mcpu=sm_80 | FileCheck %s --check-prefix=PTX

target triple = "nvptx-unknown-cuda"

define ptx_kernel void @basic(ptr noalias readonly %a, ptr %out) {
; OPT-LABEL: define ptx_kernel void @basic(
; OPT-SAME: ptr noalias readonly [[A:%.*]], ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
; OPT-NEXT:    [[A_GLOBAL:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(1)
; OPT-NEXT:    [[VAL:%.*]] = load float, ptr addrspace(1) [[A_GLOBAL]], align 4, !invariant.load [[META0:![0-9]+]]
; OPT-NEXT:    store float [[VAL]], ptr [[OUT]], align 4
; OPT-NEXT:    ret void
;
; PTX-LABEL: basic(
; PTX:       {
; PTX-NEXT:    .reg .b32 %r<6>;
; PTX-EMPTY:
; PTX-NEXT:  // %bb.0:
; PTX-NEXT:    ld.param.b32 %r1, [basic_param_0];
; PTX-NEXT:    cvta.to.global.u32 %r2, %r1;
; PTX-NEXT:    ld.param.b32 %r3, [basic_param_1];
; PTX-NEXT:    cvta.to.global.u32 %r4, %r3;
; PTX-NEXT:    ld.global.nc.b32 %r5, [%r2];
; PTX-NEXT:    st.global.b32 [%r4], %r5;
; PTX-NEXT:    ret;
  %a_global = addrspacecast ptr %a to ptr addrspace(1)
  %val = load float, ptr addrspace(1) %a_global
  store float %val, ptr %out
  ret void
}

define ptx_kernel void @select(ptr noalias readonly %a, ptr noalias readonly %b, i1 %c, ptr %out) {
; OPT-LABEL: define ptx_kernel void @select(
; OPT-SAME: ptr noalias readonly [[A:%.*]], ptr noalias readonly [[B:%.*]], i1 [[C:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT:    [[SELECT:%.*]] = select i1 [[C]], ptr [[A]], ptr [[B]]
; OPT-NEXT:    [[SELECT_GLOBAL:%.*]] = addrspacecast ptr [[SELECT]] to ptr addrspace(1)
; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[SELECT_GLOBAL]], align 4, !invariant.load [[META0]]
; OPT-NEXT:    store i32 [[VAL]], ptr [[OUT]], align 4
; OPT-NEXT:    ret void
;
; PTX-LABEL: select(
; PTX:       {
; PTX-NEXT:    .reg .pred %p<2>;
; PTX-NEXT:    .reg .b16 %rs<3>;
; PTX-NEXT:    .reg .b32 %r<9>;
; PTX-EMPTY:
; PTX-NEXT:  // %bb.0:
; PTX-NEXT:    ld.param.b8 %rs1, [select_param_2];
; PTX-NEXT:    and.b16 %rs2, %rs1, 1;
; PTX-NEXT:    setp.ne.b16 %p1, %rs2, 0;
; PTX-NEXT:    ld.param.b32 %r1, [select_param_0];
; PTX-NEXT:    cvta.to.global.u32 %r2, %r1;
; PTX-NEXT:    ld.param.b32 %r3, [select_param_1];
; PTX-NEXT:    cvta.to.global.u32 %r4, %r3;
; PTX-NEXT:    ld.param.b32 %r5, [select_param_3];
; PTX-NEXT:    cvta.to.global.u32 %r6, %r5;
; PTX-NEXT:    selp.b32 %r7, %r2, %r4, %p1;
; PTX-NEXT:    ld.global.nc.b32 %r8, [%r7];
; PTX-NEXT:    st.global.b32 [%r6], %r8;
; PTX-NEXT:    ret;
  %select = select i1 %c, ptr %a, ptr %b
  %select_global = addrspacecast ptr %select to ptr addrspace(1)
  %val = load i32, ptr addrspace(1) %select_global
  store i32 %val, ptr %out
  ret void
}

define void @not_kernel(ptr noalias readonly %a, ptr %out) {
; OPT-LABEL: define void @not_kernel(
; OPT-SAME: ptr noalias readonly [[A:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT:    [[A_GLOBAL:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(1)
; OPT-NEXT:    [[VAL:%.*]] = load float, ptr addrspace(1) [[A_GLOBAL]], align 4
; OPT-NEXT:    store float [[VAL]], ptr [[OUT]], align 4
; OPT-NEXT:    ret void
;
; PTX-LABEL: not_kernel(
; PTX:       {
; PTX-NEXT:    .reg .b32 %r<5>;
; PTX-EMPTY:
; PTX-NEXT:  // %bb.0:
; PTX-NEXT:    ld.param.b32 %r1, [not_kernel_param_0];
; PTX-NEXT:    cvta.to.global.u32 %r2, %r1;
; PTX-NEXT:    ld.param.b32 %r3, [not_kernel_param_1];
; PTX-NEXT:    ld.global.b32 %r4, [%r2];
; PTX-NEXT:    st.b32 [%r3], %r4;
; PTX-NEXT:    ret;
  %a_global = addrspacecast ptr %a to ptr addrspace(1)
  %val = load float, ptr addrspace(1) %a_global
  store float %val, ptr %out
  ret void
}

%struct.S2 = type { i64, i64 }
@G = private unnamed_addr constant %struct.S2 { i64 1, i64 1 }, align 8

define ptx_kernel void @global_load(ptr noalias readonly %a, i1 %c, ptr %out) {
; OPT-LABEL: define ptx_kernel void @global_load(
; OPT-SAME: ptr noalias readonly [[A:%.*]], i1 [[C:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
; OPT-NEXT:    [[G_GLOBAL:%.*]] = addrspacecast ptr @G to ptr addrspace(1)
; OPT-NEXT:    [[A_GLOBAL:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(1)
; OPT-NEXT:    [[SELECT:%.*]] = select i1 [[C]], ptr addrspace(1) [[G_GLOBAL]], ptr addrspace(1) [[A_GLOBAL]]
; OPT-NEXT:    [[VAL:%.*]] = load i64, ptr addrspace(1) [[SELECT]], align 8, !invariant.load [[META0]]
; OPT-NEXT:    store i64 [[VAL]], ptr [[OUT]], align 8
; OPT-NEXT:    ret void
;
; PTX-LABEL: global_load(
; PTX:       {
; PTX-NEXT:    .reg .pred %p<2>;
; PTX-NEXT:    .reg .b16 %rs<3>;
; PTX-NEXT:    .reg .b32 %r<7>;
; PTX-NEXT:    .reg .b64 %rd<2>;
; PTX-EMPTY:
; PTX-NEXT:  // %bb.0:
; PTX-NEXT:    ld.param.b8 %rs1, [global_load_param_1];
; PTX-NEXT:    and.b16 %rs2, %rs1, 1;
; PTX-NEXT:    setp.ne.b16 %p1, %rs2, 0;
; PTX-NEXT:    ld.param.b32 %r1, [global_load_param_0];
; PTX-NEXT:    cvta.to.global.u32 %r2, %r1;
; PTX-NEXT:    ld.param.b32 %r3, [global_load_param_2];
; PTX-NEXT:    cvta.to.global.u32 %r4, %r3;
; PTX-NEXT:    mov.b32 %r5, G;
; PTX-NEXT:    selp.b32 %r6, %r5, %r2, %p1;
; PTX-NEXT:    ld.global.nc.b64 %rd1, [%r6];
; PTX-NEXT:    st.global.b64 [%r4], %rd1;
; PTX-NEXT:    ret;
  %g_global = addrspacecast ptr @G to ptr addrspace(1)
  %a_global = addrspacecast ptr %a to ptr addrspace(1)
  %select = select i1 %c, ptr addrspace(1) %g_global, ptr addrspace(1) %a_global
  %val = load i64, ptr addrspace(1) %select
  store i64 %val, ptr %out
  ret void
}
;.
; OPT: [[META0]] = !{}
;.