File: predicated-load-uniform.ll

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (121 lines) | stat: -rw-r--r-- 6,333 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2025 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; REQUIRES: llvm-14-plus, regkeys
;
; RUN: igc_opt --opaque-pointers -platformbmg -igc-emit-visa %s -regkey DumpVISAASMToConsole | FileCheck %s
; ------------------------------------------------
; EmitVISAPass
; ------------------------------------------------

; Verifies that predicated loads are emitted correctly for uniform loads

define spir_kernel void @test(ptr addrspace(1) align 8 %in0, ptr addrspace(1) align 4 %in1, ptr addrspace(1) align 2 %in2, i32 %predicate) {
entry:
  %p = icmp slt i32 0, %predicate

; Check that alias of merge val is created for this case: dSize == 4 && (vSize > 64 || vSize == 6) && Align >= 8
; CHECK: .decl [[VAR0:.*]] v_type=G type=d num_elts=6 align=wordx32
; CHECK: .decl [[ALS0:.*]] v_type=G type=uq num_elts=3 align=wordx32 alias=<[[VAR0]], 0>
; CHECK: .decl [[RES01:.*]] v_type=G type=uq num_elts=3 align=wordx32

; CHECK: .decl [[RES11:.*]] v_type=G type=ud num_elts=2 align=wordx32

; Check that alias of merge val is created for this case: dSize == 8 && vSize < 64 && Align == 4
; CHECK: .decl [[VAR1:.*]] v_type=G type=q num_elts=2 align=wordx32
; CHECK: .decl [[ALS1:.*]] v_type=G type=ud num_elts=4 align=wordx32 alias=<[[VAR1]], 0>
; CHECK: .decl [[RES21:.*]] v_type=G type=ud num_elts=4 align=wordx32

; CHECK: .decl [[VAR2:.*]] v_type=G type=d num_elts=6 align=dword
; CHECK: .decl [[ALS2:.*]] v_type=G type=uq num_elts=3 align=dword alias=<[[VAR2]], 0>
; CHECK: .decl [[VAR3:.*]] v_type=G type=uq num_elts=3 align=wordx32

; CHECK: .decl [[VAR4:.*]] v_type=G type=d num_elts=1 align=wordx32

; CHECK: .decl [[VAR5:.*]] v_type=G type=d num_elts=1 align=dword
; CHECK: .decl [[VAR6:.*]] v_type=G type=d num_elts=1 align=wordx32

; check emitVectorCopy for merge val for SIMT1 transposed load
; no need to predicate copy of temp value, since we copied merge value to the temp before load
; CHECK: mov (M1_NM, 4) [[VAR0]](0,0)<1> 0x0:d
; CHECK: mov (M1_NM, 2) [[VAR0]](0,4)<1> 0x0:d
; CHECK: mov (M1_NM, 2) [[RES01]](0,0)<1> [[ALS0]](0,0)<1;1,0>
; CHECK: mov (M1_NM, 1) [[RES01]](0,2)<1> [[ALS0]](0,2)<0;1,0>
; CHECK: (P1) lsc_load.ugm (M1_NM, 1)  [[RES01]]:d64x3t  flat[{{.*}}]:a64
; CHECK: mov (M1_NM, 2) {{.*}}(0,0)<1> [[RES01]](0,0)<1;1,0>
; CHECK: mov (M1_NM, 1) {{.*}}(0,2)<1> [[RES01]](0,2)<0;1,0>
  %res0 = call <6 x i32> @llvm.genx.GenISA.PredicatedLoad.v6i32.p1.v6i32(ptr addrspace(1) %in0, i64 8, i1 %p, <6 x i32> zeroinitializer)

; case of immediate merge value when dSize == 8 && vSize < 64 && Align == 4. Creating immediate of different type
; CHECK: mov (M1_NM, 2) [[RES11]](0,0)<1> 0x0:ud
; CHECK: (P1) lsc_load.ugm (M1_NM, 1)  [[RES11]]:d32x2t  flat[{{.*}}]:a64
  %res1 = call i64 @llvm.genx.GenISA.PredicatedLoad.i64.p1.i64(ptr addrspace(1) %in1, i64 4, i1 %p, i64 0)

; case of non-immediate merge value when dSize == 8 && vSize < 64 && Align == 4.
; CHECK: mov (M1_NM, 2) [[VAR1]](0,0)<1> 0x0:q
; CHECK: mov (M1_NM, 4) [[RES21]](0,0)<1> [[ALS1]](0,0)<1;1,0>
; CHECK: (P1) lsc_load.ugm (M1_NM, 1)  [[RES21]]:d32x4t  flat[{{.*}}]:a64
  %res2 = call <2 x i64> @llvm.genx.GenISA.PredicatedLoad.v2i64.p1.v2i64(ptr addrspace(1) %in1, i64 4, i1 %p, <2 x i64> zeroinitializer)

; case when merge value is used as destination, and we need temp variable, so emit predicated copy after predicated load
; CHECK: mov (M1_NM, 1) [[VAR2]](0,0)<1> predicate(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) [[VAR2]](0,1)<1> predicate(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) [[VAR2]](0,2)<1> predicate(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) [[VAR2]](0,3)<1> predicate(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) [[VAR2]](0,4)<1> predicate(0,0)<0;1,0>
; CHECK: mov (M1_NM, 1) [[VAR2]](0,5)<1> predicate(0,0)<0;1,0>
; CHECK: (P1) lsc_load.ugm (M1_NM, 1)  [[VAR3]]:d64x3t  flat[{{.*}}]:a64
; CHECK: (P1) mov (M1_NM, 2) [[ALS2]](0,0)<1> [[VAR3]](0,0)<1;1,0>
; CHECK: (P1) mov (M1_NM, 1) [[ALS2]](0,2)<1> [[VAR3]](0,2)<0;1,0>
  %mergeV = insertelement <6 x i32> undef, i32 %predicate, i32 0
  %mergeV1 = insertelement <6 x i32> %mergeV, i32 %predicate, i32 1
  %mergeV2 = insertelement <6 x i32> %mergeV1, i32 %predicate, i32 2
  %mergeV3 = insertelement <6 x i32> %mergeV2, i32 %predicate, i32 3
  %mergeV4 = insertelement <6 x i32> %mergeV3, i32 %predicate, i32 4
  %mergeV5 = insertelement <6 x i32> %mergeV4, i32 %predicate, i32 5
  %res3 = call <6 x i32> @llvm.genx.GenISA.PredicatedLoad.v6i32.p1.v6i32(ptr addrspace(1) %in0, i64 8, i1 %p, <6 x i32> %mergeV5)

; Sub-DW aligned
; CHECK: mov (M1_NM, 1) [[VAR4]](0,0)<1> 0x0:d
; CHECK: (P1) lsc_load.ugm (M1_NM, 1)  [[VAR4]]:d32  flat[{{.*}}]:a64
; CHECK: mov (M1_NM, 1) {{.*}}(0,0)<1> [[VAR4]](0,0)<0;1,0>
  %res4 = call i32 @llvm.genx.GenISA.PredicatedLoad.i32.p1.i32(ptr addrspace(1) %in2, i64 2, i1 %p, i32 0)

; Sub-DW aligned with predicated copy after load
; CHECK: add (M1_NM, 1) [[VAR5]](0,0)<1> predicate(0,0)<0;1,0> 0x5:w
; CHECK: (P1) lsc_load.ugm (M1_NM, 1)  [[VAR6]]:d32  flat[{{.*}}]:a64
; CHECK: (P1) mov (M1_NM, 1) [[VAR5]](0,0)<1> [[VAR6]](0,0)<0;1,0>
  %mergeV6 = add i32 %predicate, 5
  %res5 = call i32 @llvm.genx.GenISA.PredicatedLoad.i32.p1.i32(ptr addrspace(1) %in2, i64 2, i1 %p, i32 %mergeV6)

  ret void
}

declare <6 x i32> @llvm.genx.GenISA.PredicatedLoad.v6i32.p1.v6i32(ptr addrspace(1), i64, i1, <6 x i32>)
declare i64 @llvm.genx.GenISA.PredicatedLoad.i64.p1.i64(ptr addrspace(1), i64, i1, i64)
declare <2 x i64> @llvm.genx.GenISA.PredicatedLoad.v2i64.p1.v2i64(ptr addrspace(1), i64, i1, <2 x i64>)
declare i32 @llvm.genx.GenISA.PredicatedLoad.i32.p1.i32(ptr addrspace(1), i64, i1, i32)

!IGCMetadata = !{!0}
!igc.functions = !{!1}

!0 = !{!"ModuleMD", !2}
!1 = !{ptr @test, !3}
!3 = !{!4}
!4 = !{!"function_type", i32 0}
!2 = !{!"FuncMD", !5, !6}
!5 = !{!"FuncMDMap[0]", ptr @test}
!6 = !{!"FuncMDValue[0]", !7}
!7 = !{!"resAllocMD", !8}
!8 = !{!"argAllocMDList", !170, !174, !175, !176}
!170 = !{!"argAllocMDListVec[0]", !171, !172, !173}
!171 = !{!"type", i32 0}
!172 = !{!"extensionType", i32 -1}
!173 = !{!"indexType", i32 -1}
!174 = !{!"argAllocMDListVec[1]", !171, !172, !173}
!175 = !{!"argAllocMDListVec[2]", !171, !172, !173}
!176 = !{!"argAllocMDListVec[3]", !171, !172, !173}