File: lower-lds-struct-aa-memcpy.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (80 lines) | stat: -rw-r--r-- 5,075 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s

%vec_type = type { %vec_base }
%vec_base = type { %union.anon }
%union.anon = type { %"vec_base<char, 3>::n_vec_" }
%"vec_base<char, 3>::n_vec_" = type { [3 x i8] }

$_f1 = comdat any
$_f2 = comdat any
@_f1 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type poison, comdat, align 1
@_f2 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type poison, comdat, align 1

;.
; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] poison, align 4, !absolute_symbol !0
;.
define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
; GCN-LABEL: test:
; GCN:       ; %bb.0: ; %entry
; GCN-NEXT:    v_mov_b32_e32 v0, 0
; GCN-NEXT:    v_mov_b32_e32 v1, 2
; GCN-NEXT:    ds_write_b8 v0, v1
; GCN-NEXT:    ds_read_u8 v2, v0 offset:2
; GCN-NEXT:    ds_read_u16 v3, v0
; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT:    s_waitcnt lgkmcnt(0)
; GCN-NEXT:    ds_write_b8 v0, v2 offset:6
; GCN-NEXT:    ds_write_b16 v0, v3 offset:4
; GCN-NEXT:    v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD
; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[2:3]
; GCN-NEXT:    global_store_byte v0, v1, s[0:1]
; GCN-NEXT:    s_endpgm
; CHECK-LABEL: define protected amdgpu_kernel void @test(
; CHECK-SAME: ptr addrspace(1) nocapture [[PTR_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
; CHECK-NEXT:    tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 3
; CHECK-NEXT:    store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
; CHECK-NEXT:    tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
; CHECK-NEXT:    [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP1]], 2
; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]]
; CHECK-NEXT:    [[FROMBOOL8:%.*]] = zext i1 [[TMP2]] to i8
; CHECK-NEXT:    store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE]], align 1
; CHECK-NEXT:    ret void
;
entry:
  store i8 3, ptr addrspace(3) @_f1, align 1
  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
  %0 = load i8, ptr addrspace(3) @_f2, align 1
  %cmp.i.i = icmp eq i8 %0, 3
  store i8 2, ptr addrspace(3) @_f1, align 1
  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false)
  %1 = load i8, ptr addrspace(3) @_f2, align 1
  %cmp.i.i19 = icmp eq i8 %1, 2
  %2 = and i1 %cmp.i.i19, %cmp.i.i
  %frombool8 = zext i1 %2 to i8
  store i8 %frombool8, ptr addrspace(1) %ptr.coerce, align 1
  ret void
}

declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #1

;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="7" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1}
; CHECK: [[META1:![0-9]+]] = !{!2}
; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3}
; CHECK: [[META3:![0-9]+]] = distinct !{!3}
; CHECK: [[META4:![0-9]+]] = !{!5}
; CHECK: [[META5:![0-9]+]] = distinct !{!5, !3}
; CHECK: [[META6:![0-9]+]] = !{!5, !2}
; CHECK: [[META7:![0-9]+]] = !{}
;.