1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand!
; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.ptr.buffer.load.lds),
declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
;---------------------------------------------------------------------y
; dwordx3
;---------------------------------------------------------------------
define amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
; GFX950-SDAG-LABEL: buffer_load_lds_dwordx3:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8
; GFX950-SDAG-NEXT: s_mov_b32 m0, s4
; GFX950-SDAG-NEXT: s_nop 0
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX950-SDAG-NEXT: ds_read_b32 v0, v0
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: buffer_load_lds_dwordx3:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_mov_b32 m0, s4
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX950-GISEL-NEXT: ds_read_b32 v0, v0
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX950-GISEL-NEXT: ; return to shader part epilog
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 4, i32 1)
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 8, i32 2)
%res = load float, ptr addrspace(3) %lds
ret float %res
}
define amdgpu_ps void @buffer_load_lds_dwordx3_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) {
; GFX950-LABEL: buffer_load_lds_dwordx3_imm_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:2048 lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 0, i32 2048, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) {
; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], 0 idxen offen lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) {
; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 idxen lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen offset:2048 lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0)
ret void
}
;---------------------------------------------------------------------y
; dwordx4
;---------------------------------------------------------------------
define amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
; GFX950-SDAG-LABEL: buffer_load_lds_dwordx4:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8
; GFX950-SDAG-NEXT: s_mov_b32 m0, s4
; GFX950-SDAG-NEXT: s_nop 0
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX950-SDAG-NEXT: ds_read_b32 v0, v0
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: buffer_load_lds_dwordx4:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_mov_b32 m0, s4
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX950-GISEL-NEXT: ds_read_b32 v0, v0
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX950-GISEL-NEXT: ; return to shader part epilog
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 4, i32 1)
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 8, i32 2)
%res = load float, ptr addrspace(3) %lds
ret float %res
}
define amdgpu_ps void @buffer_load_lds_dwordx4_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) {
; GFX950-LABEL: buffer_load_lds_dwordx4_imm_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:2048 lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 0, i32 2048, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) {
; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], 0 idxen offen lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) {
; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 idxen lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0)
ret void
}
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_mov_b32 m0, s4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen offset:2048 lds
; GFX950-NEXT: s_endpgm
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0)
ret void
}
|