1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
@lds1 = internal addrspace(3) global [4 x i32] undef, align 256
; These two objects should be allocated at the same constant offsets
; from the base.
define amdgpu_kernel void @alloc_lds_gds(ptr addrspace(1) %out) #1 {
; GCN-LABEL: alloc_lds_gds:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b32_e32 v0, 5
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_mov_b32 m0, 16
; GCN-NEXT: s_nop 0
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1
; GCN-NEXT: ds_add_u32 v1, v0 offset:12
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_endpgm
%gep.gds = getelementptr [4 x i32], ptr addrspace(2) @gds0, i32 0, i32 3
%val0 = atomicrmw add ptr addrspace(2) %gep.gds, i32 5 acq_rel
%gep.lds = getelementptr [4 x i32], ptr addrspace(3) @lds0, i32 0, i32 3
%val1 = atomicrmw add ptr addrspace(3) %gep.lds, i32 5 acq_rel
ret void
}
; The LDS alignment shouldn't change offset of GDS.
define amdgpu_kernel void @alloc_lds_gds_align(ptr addrspace(1) %out) #1 {
; GCN-LABEL: alloc_lds_gds_align:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b32_e32 v0, 5
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_mov_b32 m0, 16
; GCN-NEXT: s_nop 0
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1
; GCN-NEXT: ds_add_u32 v1, v0 offset:140
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ds_add_u32 v1, v0 offset:12
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_endpgm
%gep.gds = getelementptr [4 x i32], ptr addrspace(2) @gds0, i32 0, i32 3
%val0 = atomicrmw add ptr addrspace(2) %gep.gds, i32 5 acq_rel
%gep.lds0 = getelementptr [4 x i32], ptr addrspace(3) @lds0, i32 0, i32 3
%val1 = atomicrmw add ptr addrspace(3) %gep.lds0, i32 5 acq_rel
%gep.lds1 = getelementptr [4 x i32], ptr addrspace(3) @lds1, i32 0, i32 3
%val2 = atomicrmw add ptr addrspace(3) %gep.lds1, i32 5 acq_rel
ret void
}
@gds_align8 = internal addrspace(2) global [4 x i32] undef, align 8
@gds_align32 = internal addrspace(2) global [4 x i32] undef, align 32
define amdgpu_kernel void @gds_global_align(ptr addrspace(1) %out) {
; GCN-LABEL: gds_global_align:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b32_e32 v0, 5
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_mov_b32 m0, 32
; GCN-NEXT: s_nop 0
; GCN-NEXT: ds_add_u32 v1, v0 offset:28 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1
; GCN-NEXT: s_endpgm
%gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) @gds_align8, i32 0, i32 3
%val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel
%gep.gds1 = getelementptr [4 x i32], ptr addrspace(2) @gds_align32, i32 0, i32 3
%val1 = atomicrmw add ptr addrspace(2) %gep.gds1, i32 5 acq_rel
ret void
}
define amdgpu_kernel void @gds_global_align_plus_attr(ptr addrspace(1) %out) #0 {
; GCN-LABEL: gds_global_align_plus_attr:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b32_e32 v0, 5
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_movk_i32 m0, 0x420
; GCN-NEXT: s_nop 0
; GCN-NEXT: ds_add_u32 v1, v0 offset:1052 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1
; GCN-NEXT: ds_add_u32 v1, v0 offset:1036 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1
; GCN-NEXT: s_endpgm
%gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) @gds_align8, i32 0, i32 3
%val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel
%gep.gds1 = getelementptr [4 x i32], ptr addrspace(2) @gds_align32, i32 0, i32 3
%val1 = atomicrmw add ptr addrspace(2) %gep.gds1, i32 5 acq_rel
ret void
}
@small.gds = internal addrspace(2) global i8 undef, align 1
@gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4
define amdgpu_kernel void @gds_extern_align(ptr addrspace(1) %out, ptr addrspace(2) %gds.arg) #0 {
; GCN-LABEL: gds_extern_align:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s0, s[2:3], 0x8
; GCN-NEXT: v_mov_b32_e32 v0, 5
; GCN-NEXT: s_movk_i32 m0, 0x401
; GCN-NEXT: s_movk_i32 s1, 0x400
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s1
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s0
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1
; GCN-NEXT: s_endpgm
call void asm sideeffect "; use $0","s"(ptr addrspace(2) @small.gds)
%gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) %gds.arg, i32 0, i32 3
%val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel
ret void
}
attributes #0 = { "amdgpu-gds-size"="1024" }
|