1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -passes=amdgpu-attributor -o %t.gfx7.bc %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=amdgpu-attributor -o %t.gfx8.bc %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor -o %t.gfx9.bc %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %t.gfx7.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %t.gfx8.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %t.gfx9.bc | FileCheck --check-prefixes=CHECK,GFX9 %s
; CHECK: addrspacecast_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: is_shared_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: is_private_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: trap_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: debugtrap_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: ubsantrap_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: amdgcn_queue_ptr_requires_queue_ptr
; CHECK: .amdhsa_user_sgpr_queue_ptr 1
; On gfx8, the queue ptr is required for this addrspacecast.
; CHECK: - .args:
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: addrspacecast_requires_queue_ptr
define amdgpu_kernel void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
%flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr
%flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr
store volatile i32 1, ptr %flat.private
store volatile i32 2, ptr %flat.local
ret void
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-LABEL: .name: is_shared_requires_queue_ptr
define amdgpu_kernel void @is_shared_requires_queue_ptr(ptr %ptr) {
%is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
%zext = zext i1 %is.shared to i32
store volatile i32 %zext, ptr addrspace(1) poison
ret void
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: is_private_requires_queue_ptr
define amdgpu_kernel void @is_private_requires_queue_ptr(ptr %ptr) {
%is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
%zext = zext i1 %is.private to i32
store volatile i32 %zext, ptr addrspace(1) poison
ret void
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: trap_requires_queue_ptr
define amdgpu_kernel void @trap_requires_queue_ptr() {
call void @llvm.trap()
unreachable
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: debugtrap_requires_queue_ptr
define amdgpu_kernel void @debugtrap_requires_queue_ptr() {
call void @llvm.debugtrap()
unreachable
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: ubsantrap_requires_queue_ptr
define amdgpu_kernel void @ubsantrap_requires_queue_ptr() {
call void @llvm.ubsantrap(i8 0)
unreachable
}
; CHECK: - .args:
; CHECK-NOT: hidden_queue_ptr
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-LABEL: .name: amdgcn_queue_ptr_requires_queue_ptr
define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(ptr addrspace(1) %ptr) {
%queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
%dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
%queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
%implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
%dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
ret void
}
declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
declare i64 @llvm.amdgcn.dispatch.id()
declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
declare i1 @llvm.amdgcn.is.shared(ptr)
declare i1 @llvm.amdgcn.is.private(ptr)
declare void @llvm.trap()
declare void @llvm.debugtrap()
declare void @llvm.ubsantrap(i8 immarg)
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
|