1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2023-2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; REQUIRES: llvm-14-plus, regkeys
; RUN: igc_opt --typed-pointers -platformbmg -igc-emit-visa -simd-mode 16 -inputrt -regkey ResourceLoopUnrollIteration=4,DumpVISAASMToConsole -S < %s | FileCheck %s
;
; Test checks how we emit ResourceLoop
@ThreadGroupSize_X = constant i32 64
@ThreadGroupSize_Y = constant i32 1
@ThreadGroupSize_Z = constant i32 1
%__2D_DIM_Resource = type opaque
define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
entry:
; CHECK: _main_0:
%svn0 = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
%sampler = zext i16 %svn0 to i32
%NonUniformSampler = inttoptr i32 %sampler to <4 x float> addrspace(2752518)*
; CHECK: mov (M1, 16) svn0(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
; CHECK: mov (M1, 16) sampler(0,0)<1> svn0_0(0,0)<1;1,0>
%svn1 = extractelement <64 x i32> %src, i32 40
%texture = add i32 %svn1, 1280
%NonUniformTexture = inttoptr i32 %texture to %__2D_DIM_Resource addrspace(2621450)*
; CHECK: add (M1_NM, 1) texture(0,0)<1> src(2,8)<0;1,0> 0x500:w
; CHECK: mov (M1, 16) V0032(0,0)<1> 0x0:f
%call = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource addrspace(2621450)* undef, %__2D_DIM_Resource addrspace(2621450)* %NonUniformTexture, <4 x float> addrspace(2752518)* %NonUniformSampler, i32 0, i32 0, i32 0)
; CHECK: cmp.eq (M1, 16) P5 V0033(0,0)<0;1,0> V0033(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0034(0,0)<1> P5
; CHECK: setp (M1_NM, 16) P7 0x0:ud
; CHECK: cmp.eq (M1, 16) P7 V0035(0,0)<0;1,0> V0035(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0036(0,0)<1> P7
; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0036(0,0)<0;1,0>
; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w
; CHECK: addr_add (M1_NM, 1) A0(0)<1> &sampler_0 V0038(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A0(0),0]<0;1,0>:ud
; CHECK: cmp.eq (M1, 16) P6 V0039(0,0)<0;1,0> sampler_0(0,0)<1;1,0>
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHEKC: (P6) sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: or (M1_NM, 16) P4 P4 P6
; CHECK: xor (M1_NM, 16) P5 P5 P6
; CHECK: mov (M1_NM, 1) V0034(0,0)<1> P5
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHECK: sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHECK: sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: movs (M1_NM, 1) S31(0) V0039(0,0)<0;1,0>
; CHECK: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
; CHECK: sample_lz.R (M1, 16) 0x0:uw S31 %bss call_.0 %null.0 V0032.0
; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop
%out = extractelement <4 x float> %call, i32 0
store float %out, float addrspace(1)* %dst, align 4
ret void
}
declare <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float, float, float, float, float, %__2D_DIM_Resource addrspace(2621450)*, %__2D_DIM_Resource addrspace(2621450)*, <4 x float> addrspace(2752518)*, i32, i32, i32) #4
declare i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32) #1
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #6
declare float @llvm.genx.GenISA.WaveAll.f32(float, i8, i32) #0
declare i32 @llvm.genx.GenISA.WaveBallot(i1, i32)
declare i32 @llvm.genx.GenISA.firstbitLo(i32)
attributes #4 = { argmemonly nounwind readonly }
!IGCMetadata = !{!0}
!igc.functions = !{!21}
!0 = !{!"ModuleMD", !1}
!1 = !{!"FuncMD", !2, !3}
!2 = !{!"FuncMDMap[0]", void (<64 x i32>, float addrspace(1)*)* @test1}
!3 = !{!"FuncMDValue[0]", !4, !17}
!4 = !{!"resAllocMD", !5}
!5 = !{!"argAllocMDList", !6, !10, !11, !14, !15, !16}
!6 = !{!"argAllocMDListVec[0]", !7, !8, !9}
!7 = !{!"type", i32 0}
!8 = !{!"extensionType", i32 -1}
!9 = !{!"indexType", i32 -1}
!10 = !{!"argAllocMDListVec[1]", !7, !8, !9}
!11 = !{!"argAllocMDListVec[2]", !12, !8, !13}
!12 = !{!"type", i32 1}
!13 = !{!"indexType", i32 0}
!14 = !{!"argAllocMDListVec[3]", !7, !8, !9}
!15 = !{!"argAllocMDListVec[4]", !7, !8, !9}
!16 = !{!"argAllocMDListVec[5]", !7, !8, !9}
!17 = !{!"m_OpenCLArgTypeQualifiers", !18, !19, !20}
!18 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
!19 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
!20 = !{!"m_OpenCLArgTypeQualifiersVec[2]", !""}
!21 = !{void (<64 x i32>, float addrspace(1)*)* @test1, !22}
!22 = !{!23}
!23 = !{!"function_type", i32 0}
|