File: fold-immediates.ll

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (96 lines) | stat: -rw-r--r-- 4,731 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; REQUIRES: temporarily-disabled, regkeys
;
; RUN: igc_opt -platformbmg -igc-emit-visa %s -dx12 -inputcs -regkey DumpVISAASMToConsole | FileCheck %s
; ------------------------------------------------
; EmitVISAPass
; ------------------------------------------------
target datalayout = "e-p:32:32:32-p1:64:64:64-p2:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n8:16:32-S32"
target triple = "dxil-ms-dx"

@ThreadGroupSize_X = constant i32 1
@ThreadGroupSize_Y = constant i32 1
@ThreadGroupSize_Z = constant i32 16

; Function Attrs: null_pointer_is_valid
define void @CSMain(i32 %runtime_value_0, i32 %runtime_value_1, i32 %runtime_value_2) #0 {
  %src = inttoptr i32 %runtime_value_0 to <4 x float> addrspace(2490368)*
  %dst = inttoptr i32 %runtime_value_2 to <4 x float> addrspace(2490369)*
  %lane = call i16 @llvm.genx.GenISA.simdLaneId()
  %lane32 = zext i16 %lane to i32
  %varOffset = add i32 %runtime_value_1, %lane32
; CHECK: lsc_load.ugm (M1, 32) read_0:d32x4 bss(runtime_value_0)[varOffset]:a32
  %read_0 = call <4 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v4i32.p2490368v4f32(<4 x float> addrspace(2490368)* %src, i32 %varOffset, i32 4, i1 false)
  %ext0_0 = extractelement <4 x i32> %read_0, i32 0
  %ext0_1 = extractelement <4 x i32> %read_0, i32 1
  %ext0_2 = extractelement <4 x i32> %read_0, i32 2
  %ext0_3 = extractelement <4 x i32> %read_0, i32 3
  %addr_1 = add i32 %varOffset, 256
; CHECK: lsc_load.ugm (M1, 32) read_1:d32x4 bss(runtime_value_0)[varOffset+0x100]:a32
  %read_1 = call <4 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v4i32.p2490368v4f32(<4 x float> addrspace(2490368)* %src, i32 %addr_1, i32 4, i1 false)
  %ext1_0 = extractelement <4 x i32> %read_1, i32 0
  %ext1_1 = extractelement <4 x i32> %read_1, i32 1
  %ext1_2 = extractelement <4 x i32> %read_1, i32 2
  %ext1_3 = extractelement <4 x i32> %read_1, i32 3
  %addr_2 = add i32 %varOffset, 512
; CHECK: lsc_load.ugm (M1, 32) read_2:d32x4 bss(runtime_value_0)[varOffset+0x200]:a32
  %read_2 = call <4 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v4i32.p2490368v4f32(<4 x float> addrspace(2490368)* %src, i32 %addr_2, i32 4, i1 false)
  %ext2_0 = extractelement <4 x i32> %read_2, i32 0
  %ext2_1 = extractelement <4 x i32> %read_2, i32 1
  %ext2_2 = extractelement <4 x i32> %read_2, i32 2
  %ext2_3 = extractelement <4 x i32> %read_2, i32 3
  %addr_3 = add i32 %varOffset, 768
; CHECK: lsc_load.ugm (M1, 32) read_3:d32x4 bss(runtime_value_0)[varOffset+0x300]:a32
  %read_3 = call <4 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v4i32.p2490368v4f32(<4 x float> addrspace(2490368)* %src, i32 %addr_3, i32 4, i1 false)
  %ext3_0 = extractelement <4 x i32> %read_3, i32 0
  %ext3_1 = extractelement <4 x i32> %read_3, i32 1
  %ext3_2 = extractelement <4 x i32> %read_3, i32 2
  %ext3_3 = extractelement <4 x i32> %read_3, i32 3
  %add0_0_1 = add i32 %ext0_0, %ext1_0
  %add0_2_3 = add i32 %ext2_0, %ext3_0
  %add0 = add i32 %add0_0_1, %add0_2_3
  %add1_0_1 = add i32 %ext0_1, %ext1_1
  %add1_2_3 = add i32 %ext2_1, %ext3_1
  %add1 = add i32 %add1_0_1, %add1_2_3
  %add2_0_1 = add i32 %ext0_0, %ext1_0
  %add2_2_3 = add i32 %ext2_0, %ext3_0
  %add2= add i32 %add2_0_1, %add2_2_3
  %add3_0_1 = add i32 %ext0_0, %ext1_0
  %add3_2_3 = add i32 %ext2_0, %ext3_0
  %add3 = add i32 %add3_0_1, %add3_2_3
  %res0 = insertelement <4 x i32> undef, i32 %add0, i64 0
  %res1 = insertelement <4 x i32> %res0, i32 %add1, i64 1
  %res2 = insertelement <4 x i32> %res1, i32 %add2, i64 2
  %res3 = insertelement <4 x i32> %res2, i32 %add3, i64 3
  call void @llvm.genx.GenISA.storerawvector.indexed.p2490369v4f32.v4i32(<4 x float> addrspace(2490369)* %dst, i32 0,  <4 x i32> %res3, i32 4, i1 false)
  ret void
}

declare <4 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v4i32.p2490368v4f32(<4 x float> addrspace(2490368)*, i32, i32, i1) #1

declare void @llvm.genx.GenISA.storerawvector.indexed.p2490369v4f32.v4i32(<4 x float> addrspace(2490369)*, i32, <4 x i32>, i32, i1) #2

declare i16 @llvm.genx.GenISA.simdLaneId() #3

attributes #0 = { null_pointer_is_valid }
attributes #1 = { argmemonly nounwind readonly }
attributes #2 = { argmemonly nounwind writeonly }
attributes #3 = { nounwind readnone }

!igc.functions = !{!0}
!IGCMetadata = !{!3}

!0 = !{void (i32, i32, i32)* @CSMain, !1}
!1 = !{!2}
!2 = !{!"function_type", i32 0}
!3 = !{!"ModuleMD", !4}
!4 = !{!"FuncMD", !5, !6}
!5 = !{!"FuncMDMap[0]", void (i32, i32, i32)* @CSMain}
!6 = !{!"FuncMDValue[0]"}