1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX10 %s
;
; None of these functions should have the attribute amdgpu-no-flat-scratch-init. In these tests
; we manually set the attribute for the functions. The purpose is to test how the amdgpu-attributor pass
; handles this situation.
;
;; tests of addrspacecast
define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define void @with_private_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_private_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
store volatile i32 0, ptr %stof
ret void
}
define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
store volatile i32 0, ptr %stof
ret void
}
define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define void @call_with_private_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @call_with_private_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
ret void
}
define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
ret void
}
;; tests of addrspacecast in a constant
define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) nocapture %out) #0 {
; GFX9-LABEL: define amdgpu_kernel void @private_constant_expression_use(
; GFX9-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @private_constant_expression_use(
; GFX10-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
; GFX10-NEXT: ret void
;
store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) %out, align 8
ret void
}
;; tests of intrinsics
define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX10-NEXT: ret void
;
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
store volatile i32 7, ptr %1, align 4
ret void
}
define void @calls_intrin_ascast(ptr addrspace(3) %ptr) #0 {
; GFX9-LABEL: define void @calls_intrin_ascast(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @calls_intrin_ascast(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX10-NEXT: ret void
;
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
store volatile i32 7, ptr %1, align 4
ret void
}
define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: call void @calls_intrin_ascast(ptr addrspace(3) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: call void @calls_intrin_ascast(ptr addrspace(3) [[PTR]])
; GFX10-NEXT: ret void
;
call void @calls_intrin_ascast(ptr addrspace(3) %ptr)
ret void
}
attributes #0 = { "amdgpu-no-flat-scratch-init" }
;.
; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; GFX9: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
;.
; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
; GFX10: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
;.
|