| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 
 | # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=none -o - %s | FileCheck %s
--- |
  ; ModuleID = 'test/CodeGen/AMDGPU/memcpy-scoped-aa.ll'
  source_filename = "test/CodeGen/AMDGPU/memcpy-scoped-aa.ll"
  target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
  target triple = "amdgcn-amd-amdhsa"
  define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 {
    %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
    %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4
    %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)*
    tail call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3
    %1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)*
    %2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0
    %v01 = extractelement <2 x i32> %2, i32 0
    %v12 = extractelement <2 x i32> %2, i32 1
    %add = add i32 %v01, %v12
    ret i32 %add
  }
  define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 {
    %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
    %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4
    %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)*
    tail call void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3
    %1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)*
    %2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0
    %v01 = extractelement <2 x i32> %2, i32 0
    %v12 = extractelement <2 x i32> %2, i32 1
    %add = add i32 %v01, %v12
    ret i32 %add
  }
  ; Function Attrs: argmemonly nofree nounwind willreturn
  declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #1
  ; Function Attrs: argmemonly nofree nounwind willreturn
  declare void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64 immarg, i1 immarg) #1
  ; Function Attrs: convergent nounwind willreturn
  declare { i1, i32 } @llvm.amdgcn.if.i32(i1) #2
  ; Function Attrs: convergent nounwind willreturn
  declare { i1, i32 } @llvm.amdgcn.else.i32.i32(i32) #2
  ; Function Attrs: convergent nounwind readnone willreturn
  declare i32 @llvm.amdgcn.if.break.i32(i1, i32) #3
  ; Function Attrs: convergent nounwind willreturn
  declare i1 @llvm.amdgcn.loop.i32(i32) #2
  ; Function Attrs: convergent nounwind willreturn
  declare void @llvm.amdgcn.end.cf.i32(i32) #2
  attributes #0 = { "target-cpu"="gfx1010" }
  attributes #1 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx1010" }
  attributes #2 = { convergent nounwind willreturn }
  attributes #3 = { convergent nounwind readnone willreturn }
  !0 = !{!1}
  !1 = distinct !{!1, !2, !"bax: %p"}
  !2 = distinct !{!2, !"bax"}
  !3 = !{!4}
  !4 = distinct !{!4, !2, !"bax: %q"}
...
---
name:            test_memcpy
machineMetadataNodes:
  - '!9 = distinct !{!9, !7, !"Dst"}'
  - '!6 = distinct !{!6, !7, !"Src"}'
  - '!11 = !{!4, !6}'
  - '!5 = !{!1, !6}'
  - '!8 = !{!4, !9}'
  - '!10 = !{!1, !9}'
  - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}'
body:             |
  bb.0 (%ir-block.0):
    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
    ; CHECK-LABEL: name: test_memcpy
    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
    ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
    ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
    ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
    ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
    ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
    ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
    ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
    ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
    ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
    ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
    ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec
    ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]]
    ; CHECK: SI_RETURN implicit $vgpr0
    %3:vgpr_32 = COPY $vgpr3
    %2:vgpr_32 = COPY $vgpr2
    %1:vgpr_32 = COPY $vgpr1
    %0:vgpr_32 = COPY $vgpr0
    %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
    %9:vreg_64 = COPY %18
    %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
    %10:vreg_64 = COPY %18
    GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
    %12:vreg_64 = COPY %17
    %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
    %13:vgpr_32 = COPY %11.sub0
    %14:vgpr_32 = COPY %11.sub1
    %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec
    $vgpr0 = COPY %15
    SI_RETURN implicit $vgpr0
...
---
name:            test_memcpy_inline
machineMetadataNodes:
  - '!6 = distinct !{!6, !7, !"Src"}'
  - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}'
  - '!9 = distinct !{!9, !7, !"Dst"}'
  - '!11 = !{!4, !6}'
  - '!5 = !{!1, !6}'
  - '!8 = !{!4, !9}'
  - '!10 = !{!1, !9}'
body:             |
  bb.0 (%ir-block.0):
    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
    ; CHECK-LABEL: name: test_memcpy_inline
    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
    ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
    ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
    ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
    ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
    ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
    ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
    ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
    ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
    ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
    ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
    ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec
    ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]]
    ; CHECK: SI_RETURN implicit $vgpr0
    %3:vgpr_32 = COPY $vgpr3
    %2:vgpr_32 = COPY $vgpr2
    %1:vgpr_32 = COPY $vgpr1
    %0:vgpr_32 = COPY $vgpr0
    %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
    %9:vreg_64 = COPY %18
    %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1)
    %10:vreg_64 = COPY %18
    GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1)
    %12:vreg_64 = COPY %17
    %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1)
    %13:vgpr_32 = COPY %11.sub0
    %14:vgpr_32 = COPY %11.sub1
    %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec
    $vgpr0 = COPY %15
    SI_RETURN implicit $vgpr0
...
 |