File: barrier_control_flow_pass.ll

package info (click to toggle)
intel-graphics-compiler2 2.28.4-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 792,744 kB
  • sloc: cpp: 5,761,745; ansic: 466,928; lisp: 312,143; python: 114,790; asm: 44,736; pascal: 10,930; sh: 8,033; perl: 7,914; ml: 3,625; awk: 3,523; yacc: 2,747; javascript: 2,667; lex: 1,898; f90: 1,028; cs: 573; xml: 474; makefile: 344; objc: 162
file content (133 lines) | stat: -rw-r--r-- 5,981 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; REQUIRES: llvm-14-plus, regkeys
; RUN: igc_opt --opaque-pointers --platformbmg --regkey EnableBarrierControlFlowOptimizationPass --igc-barrier-control-flow-optimization  -S < %s 2>&1 | FileCheck %s
; ------------------------------------------------
; BarrierControlFlowOptimizationPass:
; ------------------------------------------------

; Checks barrier control flow optimization scenario
; before optimization( fence(ugm/tgm, scope local and op none), group sync(threadgroupbarrier) )
; after optimization ( fence(ugm/tgm, scope gpu and op evict), group sync(threadgroupbarrier) )
define void @test_ugm_tgm_thread_group() {
; CHECK-LABEL: @test_ugm_tgm_thread_group(
; CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 0, i32 3, i32 1)
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 2, i32 3, i32 1)
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK: ret void
;
  call void @llvm.genx.GenISA.LSCFence(i32 0, i32 1, i32 0)
  call void @llvm.genx.GenISA.LSCFence(i32 2, i32 1, i32 0)
  call void @llvm.genx.GenISA.threadgroupbarrier()
  ret void
}

; Checks barrier control flow optimization scenario
; before optimization( fence(slm, scope group and op none)/fence((tgm, scope local and op none), group sync(threadgroupbarrier) )
; after optimization ( fence(slm, scope group and op none)/fence((tgm, scope gpu and op evict), group sync(threadgroupbarrier) )
define void @test_slm_tgm_thread_group() {
; CHECK-LABEL: @test_slm_tgm_thread_group(
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
; CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 2, i32 3, i32 1)
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK: ret void
;
  call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
  call void @llvm.genx.GenISA.LSCFence(i32 2, i32 1, i32 0)
  call void @llvm.genx.GenISA.threadgroupbarrier()
  ret void
}

; Checks barrier control flow optimization scenario
; before optimization( fence(ugm, scope local and op none)/fence((tgm, scope local and op none)/group sync(threadgroupbarrier), fence(ugm, scope gpu and op evict)/fence((tgm, scope gpu and op evict) )
; after optimization ( group sync(threadgroupbarrier)/fence(ugm, scope gpu and op evict)/fence((tgm, scope gpu and op evict), group sync(threadgroupbarrier)/fence(ugm, scope gpu and op evict)/fence((tgm, scope gpu and op evict), )
define void @test_slm_tgm_bar_into_slm_tgm() {
; CHECK-LABEL: @test_slm_tgm_bar_into_slm_tgm(
; CHECK:  call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 0, i32 3, i32 1)
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 2, i32 3, i32 1)
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 0, i32 3, i32 1)
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 2, i32 3, i32 1)
; CHECK-DAG: br
; CHECK: ret void
  call void @llvm.genx.GenISA.LSCFence(i32 0, i32 1, i32 0)
  call void @llvm.genx.GenISA.LSCFence(i32 2, i32 1, i32 0)
  call void @llvm.genx.GenISA.threadgroupbarrier()
  br label %test2
test2:
  call void @llvm.genx.GenISA.LSCFence(i32 0, i32 3, i32 1)
  call void @llvm.genx.GenISA.LSCFence(i32 2, i32 3, i32 1)
  br label %finish
finish:
  ret void
}

; Checks barrier control flow optimization scenario
; before optimization( fence(slm, scope none and op none), group sync(threadgroupbarrier) )
; after optimization ( fence(slm, scope none and op none), group sync(threadgroupbarrier) )
define void @test_no_redundant_barrier() {
; CHECK-LABEL: @test_no_redundant_barrier(
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
; CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK-DAG: br
; CHECK-DAG: br
; CHECK-NOT: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK-DAG: br
; CHECK: [[TMP10:%.*]] = and i32 3, 31
; CHECK: [[TMP11:%.*]] = shl nuw i32 1, [[TMP10]]
; CHECK: [[TMP12:%.*]] = inttoptr i32 16 to ptr addrspace(3)
; CHECK: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.intatomicrawA64.i32.p3i32.p3i32(ptr addrspace(3) [[TMP12]], ptr addrspace(3) [[TMP12]], i32 [[TMP11]], i32 9)
; CHECK-DAG: br
; CHECK-NOT: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 0, i32 3, i32 1)
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 2, i32 3, i32 1)
; CHECK-DAG: br
; CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
; CHECK: [[TMP17:%.*]] = load i32, ptr addrspace(3) [[TMP12]], align 16, !tbaa !0
; CHECK: [[TMP18:%.*]] = and i32 [[TMP17]], 31
; CHECK: ret void
;
  call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
  call void @llvm.genx.GenISA.threadgroupbarrier()
  br label %1
1:
  %2 = and i32 3, 31
  %3 = shl nuw i32 1, %2
  %4 = inttoptr i32 16 to i32 addrspace(3)*
  %5 = call i32 @llvm.genx.GenISA.intatomicrawA64.i32.p3i32.p3i32(i32 addrspace(3)* %4, i32 addrspace(3)* %4, i32 %3, i32 9)
  br label %6

6:
  call void @llvm.genx.GenISA.LSCFence(i32 0, i32 3, i32 1)
  call void @llvm.genx.GenISA.LSCFence(i32 2, i32 3, i32 1)
  call void @llvm.genx.GenISA.threadgroupbarrier()
  %7 = load i32, i32 addrspace(3)* %4, align 16, !tbaa !507
  %8 = and i32 %7, 31

  ret void
}

declare void @llvm.genx.GenISA.LSCFence(i32, i32, i32)
declare void @llvm.genx.GenISA.threadgroupbarrier()
declare i32 @llvm.genx.GenISA.intatomicrawA64.i32.p3i32.p3i32(i32 addrspace(3)*, i32 addrspace(3)*, i32, i32)

!507 = !{!508, !508, i64 0}
!508 = !{!"int", !509, i64 0}
!509 = !{!"omnipotent char", !510, i64 0}
!510 = !{!"Simple C/C++ TBAA"}