1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=UNPACKED %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PACKED %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PACKED %s
define amdgpu_ps half @buffer_load_format_d16_x(ptr addrspace(8) inreg %rsrc) {
; UNPACKED-LABEL: buffer_load_format_d16_x:
; UNPACKED: ; %bb.0: ; %main_body
; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; UNPACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x20,0xe0,0x00,0x00,0x00,0x80]
; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
; UNPACKED-NEXT: ; return to shader part epilog
;
; PACKED-LABEL: buffer_load_format_d16_x:
; PACKED: ; %bb.0: ; %main_body
; PACKED-NEXT: v_mov_b32_e32 v0, 0
; PACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen
; PACKED-NEXT: s_waitcnt vmcnt(0)
; PACKED-NEXT: ; return to shader part epilog
main_body:
%data = call half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
ret half %data
}
define amdgpu_ps half @buffer_load_format_d16_xy(ptr addrspace(8) inreg %rsrc) {
; UNPACKED-LABEL: buffer_load_format_d16_xy:
; UNPACKED: ; %bb.0: ; %main_body
; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; UNPACKED-NEXT: buffer_load_format_d16_xy v[0:1], v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x24,0xe0,0x00,0x00,0x00,0x80]
; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
; UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
; UNPACKED-NEXT: ; return to shader part epilog
;
; PACKED-LABEL: buffer_load_format_d16_xy:
; PACKED: ; %bb.0: ; %main_body
; PACKED-NEXT: v_mov_b32_e32 v0, 0
; PACKED-NEXT: buffer_load_format_d16_xy v0, v0, s[0:3], 0 idxen
; PACKED-NEXT: s_waitcnt vmcnt(0)
; PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; PACKED-NEXT: ; return to shader part epilog
main_body:
%data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%elt = extractelement <2 x half> %data, i32 1
ret half %elt
}
define amdgpu_ps half @buffer_load_format_d16_xyz(ptr addrspace(8) inreg %rsrc) {
; UNPACKED-LABEL: buffer_load_format_d16_xyz:
; UNPACKED: ; %bb.0: ; %main_body
; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; UNPACKED-NEXT: buffer_load_format_d16_xyz v[0:2], v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x28,0xe0,0x00,0x00,0x00,0x80]
; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
; UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e]
; UNPACKED-NEXT: ; return to shader part epilog
;
; PACKED-LABEL: buffer_load_format_d16_xyz:
; PACKED: ; %bb.0: ; %main_body
; PACKED-NEXT: v_mov_b32_e32 v0, 0
; PACKED-NEXT: buffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 idxen
; PACKED-NEXT: s_waitcnt vmcnt(0)
; PACKED-NEXT: v_mov_b32_e32 v0, v1
; PACKED-NEXT: ; return to shader part epilog
main_body:
%data = call <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%elt = extractelement <3 x half> %data, i32 2
ret half %elt
}
define amdgpu_ps half @buffer_load_format_d16_xyzw(ptr addrspace(8) inreg %rsrc) {
; UNPACKED-LABEL: buffer_load_format_d16_xyzw:
; UNPACKED: ; %bb.0: ; %main_body
; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; UNPACKED-NEXT: buffer_load_format_d16_xyzw v[0:3], v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x2c,0xe0,0x00,0x00,0x00,0x80]
; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
; UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e]
; UNPACKED-NEXT: ; return to shader part epilog
;
; PACKED-LABEL: buffer_load_format_d16_xyzw:
; PACKED: ; %bb.0: ; %main_body
; PACKED-NEXT: v_mov_b32_e32 v0, 0
; PACKED-NEXT: buffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 idxen
; PACKED-NEXT: s_waitcnt vmcnt(0)
; PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; PACKED-NEXT: ; return to shader part epilog
main_body:
%data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%elt = extractelement <4 x half> %data, i32 3
ret half %elt
}
define amdgpu_ps half @buffer_load_format_i16_x(ptr addrspace(8) inreg %rsrc) {
; UNPACKED-LABEL: buffer_load_format_i16_x:
; UNPACKED: ; %bb.0: ; %main_body
; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; UNPACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x20,0xe0,0x00,0x00,0x00,0x80]
; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
; UNPACKED-NEXT: ; return to shader part epilog
;
; PACKED-LABEL: buffer_load_format_i16_x:
; PACKED: ; %bb.0: ; %main_body
; PACKED-NEXT: v_mov_b32_e32 v0, 0
; PACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen
; PACKED-NEXT: s_waitcnt vmcnt(0)
; PACKED-NEXT: ; return to shader part epilog
main_body:
%data = call i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
%fdata = bitcast i16 %data to half
ret half %fdata
}
declare half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32, i32)
declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32, i32)
declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32)
declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32)
declare i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8), i32, i32, i32, i32)
|