File: reduce_opt.ll

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (90 lines) | stat: -rwxr-xr-x 4,226 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2022 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; RUN: igc_opt %s -S -o - -opt-reduce-pass | FileCheck %s
; UNSUPPORTED: khronos-translator

declare spir_func <3 x i64> @__builtin_spirv_BuiltInGlobalSize()
declare spir_func <3 x i64> @__builtin_spirv_BuiltInGlobalOffset()
declare spir_func <3 x i64> @__builtin_spirv_BuiltInGlobalInvocationId()
declare spir_func i32 @__builtin_spirv_OpGroupIMulKHR_i32_i32_i32(i32, i32, i32)

define spir_kernel void @kernel1(i32 addrspace(1)* %arg0, i32 addrspace(1)* %arg1) {
  %size = call spir_func <3 x i64> @__builtin_spirv_BuiltInGlobalSize()
  %size1 = extractelement <3 x i64> %size, i32 1
  %size0 = extractelement <3 x i64> %size, i32 0
  %id = call spir_func <3 x i64> @__builtin_spirv_BuiltInGlobalInvocationId()
  %id2 = extractelement <3 x i64> %id, i32 2
  %id1 = extractelement <3 x i64> %id, i32 1
  %id0 = extractelement <3 x i64> %id, i32 0
  %offset = call spir_func <3 x i64> @__builtin_spirv_BuiltInGlobalOffset()
  %offset2 = extractelement <3 x i64> %offset, i32 2
  %offset1 = extractelement <3 x i64> %offset, i32 1
  %offset0 = extractelement <3 x i64> %offset, i32 0
  %sub0 = sub i64 %id2, %offset2
  %mul0 = mul i64 %sub0, %size1
  %sub1 = sub i64 %id1, %offset1
  %add0 = add i64 %mul0, %sub1
  %mul1 = mul i64 %add0, %size0
  %sub2 = sub i64 %id0, %offset0
  %add1 = add i64 %mul1, %sub2
  %gep0 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %add1
  %addr0 = addrspacecast i32 addrspace(1)* %gep0 to i32 addrspace(4)*
  %ld = load i32, i32 addrspace(4)* %addr0, align 4
  %red = call spir_func i32 @__builtin_spirv_OpGroupIMulKHR_i32_i32_i32(i32 2, i32 0, i32 %ld)
  %cnd = icmp eq i64 %add1, 0
  %tof = sitofp i32 %red to float
  ; This pass optimized only reduce instruction
  ;
  ; CHECK:    [[LD:%.*]] = load i32, i32 addrspace(4)* {{.*}}, align 4
  ; CHECK:    [[TMP1:%.*]] = call spir_func i32 @__builtin_IB_WorkGroupReduce_WI0_IMulKHR_i32(i32 [[LD]])
  ; CHECK:    [[TOF:%.*]] = sitofp i32 [[TMP1]] to float
  %fadd0 = fadd float %tof, 1.000000e+02
  %toi = fptosi float %fadd0 to i32
  %sel0 = select i1 %cnd, i32 %toi, i32 0
  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %add1
  %addr1 = addrspacecast i32 addrspace(1)* %gep1 to i32 addrspace(4)*
  store i32 %sel0, i32 addrspace(4)* %addr1, align 4
  ret void
}


declare spir_func i32 @__builtin_spirv_OpGroupIAdd_i32_i32_i32(i32, i32, i32)
declare spir_func <3 x i64> @__builtin_spirv_BuiltInLocalInvocationId()
declare spir_func <3 x i64> @__builtin_spirv_BuiltInWorkgroupSize()

define spir_kernel void @kernel2(i32 addrspace(1)* %arg0, i32 addrspace(1)* %arg1) {
  %size = call spir_func <3 x i64> @__builtin_spirv_BuiltInWorkgroupSize()
  %size1 = extractelement <3 x i64> %size, i32 1
  %size0 = extractelement <3 x i64> %size, i32 0
  %lid = call spir_func <3 x i64> @__builtin_spirv_BuiltInLocalInvocationId()
  %lid2 = extractelement <3 x i64> %lid, i32 2
  %lid1 = extractelement <3 x i64> %lid, i32 1
  %lid0 = extractelement <3 x i64> %lid, i32 0
  %mul0 = mul i64 %lid2, %size1
  %add0 = add i64 %mul0, %lid1
  %mul1 = mul i64 %add0, %size0
  %add1 = add i64 %mul1, %lid0
  %gep0 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %add1
  %addr0 = addrspacecast i32 addrspace(1)* %gep0 to i32 addrspace(4)*
  %ld0 = load i32, i32 addrspace(4)* %addr0, align 4
  %red = call spir_func i32 @__builtin_spirv_OpGroupIAdd_i32_i32_i32(i32 2, i32 0, i32 %ld0)
  %cnd = icmp eq i64 %add1, 0
  %add2 = add nsw i32 %red, 100
  ; This pass optimized only reduce instruction
  ;
  ; CHECK:    [[LD0:%.*]] = load i32, i32 addrspace(4)* {{.*}}, align
  ; CHECK:    [[TMP1:%.*]] = call spir_func i32 @__builtin_IB_WorkGroupReduce_WI0_IAdd_i32(i32 [[LD0]])
  ; CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP1:%.*]], 100
  %sel = select i1 %cnd, i32 %add2, i32 0
  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %add1
  %addr1 = addrspacecast i32 addrspace(1)* %gep1 to i32 addrspace(4)*
  store i32 %sel, i32 addrspace(4)* %addr1, align 4
  ret void
}