1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}load.f16.1d:
; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 d16
define amdgpu_ps <4 x half> @load.f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v2f16.1d:
; GFX9: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
; GFX10: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16 d16
define amdgpu_ps <4 x half> @load.v2f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v3f16.1d:
; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16 d16
define amdgpu_ps <4 x half> @load.v3f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v4f16.1d:
; GFX9: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
; GFX10: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 d16
define amdgpu_ps <4 x half> @load.v4f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.f16.2d:
; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16 d16
define amdgpu_ps <4 x half> @load.f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%y = extractelement <2 x i16> %coords, i32 1
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v2f16.2d:
; GFX9: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
; GFX10: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16 d16
define amdgpu_ps <4 x half> @load.v2f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%y = extractelement <2 x i16> %coords, i32 1
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v3f16.2d:
; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16 d16
define amdgpu_ps <4 x half> @load.v3f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%y = extractelement <2 x i16> %coords, i32 1
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v4f16.2d:
; GFX9: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
; GFX10: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 d16
define amdgpu_ps <4 x half> @load.v4f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
main_body:
%x = extractelement <2 x i16> %coords, i32 0
%y = extractelement <2 x i16> %coords, i32 1
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.f16.3d:
; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16 d16
; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16 d16
define amdgpu_ps <4 x half> @load.f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
main_body:
%x = extractelement <2 x i16> %coords_lo, i32 0
%y = extractelement <2 x i16> %coords_lo, i32 1
%z = extractelement <2 x i16> %coords_hi, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v2f16.3d:
; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm a16 d16
; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16 d16
define amdgpu_ps <4 x half> @load.v2f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
main_body:
%x = extractelement <2 x i16> %coords_lo, i32 0
%y = extractelement <2 x i16> %coords_lo, i32 1
%z = extractelement <2 x i16> %coords_hi, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v3f16.3d:
; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16 d16
define amdgpu_ps <4 x half> @load.v3f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
main_body:
%x = extractelement <2 x i16> %coords_lo, i32 0
%y = extractelement <2 x i16> %coords_lo, i32 1
%z = extractelement <2 x i16> %coords_hi, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
; GCN-LABEL: {{^}}load.v4f16.3d:
; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm a16 d16
; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 d16
define amdgpu_ps <4 x half> @load.v4f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
main_body:
%x = extractelement <2 x i16> %coords_lo, i32 0
%y = extractelement <2 x i16> %coords_lo, i32 1
%z = extractelement <2 x i16> %coords_hi, i32 0
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %v
}
declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32, i16, <8 x i32>, i32, i32) #2
declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
|