File: mem-intrinsics.ll

package info (click to toggle)
llvm-toolchain-16 1%3A16.0.6-15~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,634,792 kB
  • sloc: cpp: 6,179,261; ansic: 1,216,205; asm: 741,319; python: 196,614; objc: 75,325; f90: 49,640; lisp: 32,396; pascal: 12,286; sh: 9,394; perl: 7,442; ml: 5,494; awk: 3,523; makefile: 2,723; javascript: 1,206; xml: 886; fortran: 581; cs: 573
file content (144 lines) | stat: -rw-r--r-- 9,385 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s

; CHECK-LABEL: @memset_group_to_flat(
; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 {
  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memset_global_to_flat(
; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 {
  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memset_group_to_flat_no_md(
; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_group_to_flat_no_md(ptr addrspace(3) %group.ptr, i64 %size) #0 {
  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 %size, i1 false)
  ret void
}

; CHECK-LABEL: @memset_global_to_flat_no_md(
; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.ptr, i64 %size) #0 {
  %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
  call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 %size, i1 false)
  ret void
}

; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memcpy_inline_flat_to_flat_replace_src_with_group(
; CHECK: call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr) #0 {
  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  call void @llvm.memcpy.inline.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
; CHECK: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr %src.ptr, i64 %size) #0 {
  %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast.dest, ptr align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %src.group.ptr, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  %cast.dest = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast.dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
; CHECK: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(1) align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr addrspace(3) %dest.group.ptr, ptr addrspace(1) %src.global.ptr, i64 %size) #0 {
  %cast.src = addrspacecast ptr addrspace(1) %src.global.ptr to ptr
  %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast.dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
; CHECK: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %dest.global.ptr, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspace(1) %dest.global.ptr, ptr addrspace(3) %src.group.ptr, i32 %size) #0 {
  %cast.dest = addrspacecast ptr addrspace(1) %dest.global.ptr to ptr
  call void @llvm.memcpy.p0.p3.i32(ptr align 4 %cast.dest, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !8
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !8
  ret void
}

; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false)
  ret void
}

; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest0, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}}
; CHECK: call void @llvm.memcpy.p0.p3.i64(ptr align 4 %dest1, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}}
define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest0, ptr %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest0, ptr align 4 %cast.src, i64 %size, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest1, ptr align 4 %cast.src, i64 %size, i1 false)
  ret void
}

; Check for iterator problems if the pointer has 2 uses in the same call
; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %group.ptr, ptr addrspace(3) align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %group.ptr) #0 {
  %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %cast, ptr align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}
; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
; CHECK: call void @llvm.memmove.p0.p3.i64(ptr align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
  %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
  call void @llvm.memmove.p0.p0.i64(ptr align 4 %dest, ptr align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6
  ret void
}

declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1
declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1
declare void @llvm.memcpy.p0.p3.i32(ptr nocapture writeonly, ptr addrspace(3) nocapture readonly, i32, i1) #1
declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1

attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }

!0 = !{!1, !1, i64 0}
!1 = !{!"A", !2}
!2 = !{!"tbaa root"}
!3 = !{!4}
!4 = distinct !{!4, !5, !"some scope 1"}
!5 = distinct !{!5, !"some domain"}
!6 = !{!7}
!7 = distinct !{!7, !5, !"some scope 2"}
!8 = !{i64 0, i64 8, null}