1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=none -o - %s | FileCheck %s
--- |
; ModuleID = 'test/CodeGen/AMDGPU/memcpy-scoped-aa.ll'
source_filename = "test/CodeGen/AMDGPU/memcpy-scoped-aa.ll"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 {
%p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
%add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4
%p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)*
tail call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3
%1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)*
%2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0
%v01 = extractelement <2 x i32> %2, i32 0
%v12 = extractelement <2 x i32> %2, i32 1
%add = add i32 %v01, %v12
ret i32 %add
}
define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 {
%p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
%add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4
%p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)*
tail call void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3
%1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)*
%2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0
%v01 = extractelement <2 x i32> %2, i32 0
%v12 = extractelement <2 x i32> %2, i32 1
%add = add i32 %v01, %v12
ret i32 %add
}
; Function Attrs: argmemonly nofree nounwind willreturn
declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #1
; Function Attrs: argmemonly nofree nounwind willreturn
declare void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64 immarg, i1 immarg) #1
; Function Attrs: convergent nounwind willreturn
declare { i1, i32 } @llvm.amdgcn.if.i32(i1) #2
; Function Attrs: convergent nounwind willreturn
declare { i1, i32 } @llvm.amdgcn.else.i32.i32(i32) #2
; Function Attrs: convergent nounwind readnone willreturn
declare i32 @llvm.amdgcn.if.break.i32(i1, i32) #3
; Function Attrs: convergent nounwind willreturn
declare i1 @llvm.amdgcn.loop.i32(i32) #2
; Function Attrs: convergent nounwind willreturn
declare void @llvm.amdgcn.end.cf.i32(i32) #2
attributes #0 = { "target-cpu"="gfx1010" }
attributes #1 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx1010" }
attributes #2 = { convergent nounwind willreturn }
attributes #3 = { convergent nounwind readnone willreturn }
!0 = !{!1}
!1 = distinct !{!1, !2, !"bax: %p"}
!2 = distinct !{!2, !"bax"}
!3 = !{!4}
!4 = distinct !{!4, !2, !"bax: %q"}
...
---
name: test_memcpy
machineMetadataNodes:
- '!9 = distinct !{!9, !7, !"Dst"}'
- '!6 = distinct !{!6, !7, !"Src"}'
- '!11 = !{!4, !6}'
- '!5 = !{!1, !6}'
- '!8 = !{!4, !9}'
- '!10 = !{!1, !9}'
- '!7 = distinct !{!7, !"MemcpyLoweringDomain"}'
body: |
bb.0 (%ir-block.0):
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: test_memcpy
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec
; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]]
; CHECK: SI_RETURN implicit $vgpr0
%3:vgpr_32 = COPY $vgpr3
%2:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
%9:vreg_64 = COPY %18
%8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
%10:vreg_64 = COPY %18
GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
%12:vreg_64 = COPY %17
%11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
%13:vgpr_32 = COPY %11.sub0
%14:vgpr_32 = COPY %11.sub1
%15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec
$vgpr0 = COPY %15
SI_RETURN implicit $vgpr0
...
---
name: test_memcpy_inline
machineMetadataNodes:
- '!6 = distinct !{!6, !7, !"Src"}'
- '!7 = distinct !{!7, !"MemcpyLoweringDomain"}'
- '!9 = distinct !{!9, !7, !"Dst"}'
- '!11 = !{!4, !6}'
- '!5 = !{!1, !6}'
- '!8 = !{!4, !9}'
- '!10 = !{!1, !9}'
body: |
bb.0 (%ir-block.0):
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: test_memcpy_inline
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec
; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]]
; CHECK: SI_RETURN implicit $vgpr0
%3:vgpr_32 = COPY $vgpr3
%2:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
%9:vreg_64 = COPY %18
%8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
%10:vreg_64 = COPY %18
GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
%12:vreg_64 = COPY %17
%11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
%13:vgpr_32 = COPY %11.sub0
%14:vgpr_32 = COPY %11.sub1
%15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec
$vgpr0 = COPY %15
SI_RETURN implicit $vgpr0
...
|