File: args_via_stack.ll

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (110 lines) | stat: -rw-r--r-- 6,301 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2021 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; RUN: %opt %use_old_pass_manager% -GenXPrologEpilogInsertion -vc-arg-reg-size=32 -vc-ret-reg-size=12 \
; RUN: -mattr=+ocl_runtime -march=genx64 -mcpu=Gen9 -S < %s | FileCheck %s

; COM: This test checks only the current state of prologepilog transformation.
; COM: So, quite strange things can be observed here such as the absence of SP
; COM: increment.

target datalayout = "e-p:64:64-i64:64-n8:16:32:64"
target triple = "spir64-unknown-unknown"

; CHECK-LABEL: foo
; COM: ARG is fully occupied by the first arg.
; CHECK: %[[ARGREAD:[^ ]+]] = call <256 x i32> @llvm.genx.read.predef.reg.v256i32.v256i32(i32 8, <256 x i32> undef)
; CHECK: %[[ARG0:[^ ]+]] = call <256 x i32> @llvm.genx.rdregioni.v256i32.v256i32.i16(<256 x i32> %[[ARGREAD]], i32 0, i32 256, i32 1, i16 0, i32 undef)

; COM: read SP
; CHECK: %[[SPREAD:[^ ]+]] = call <1 x i64> @llvm.genx.read.predef.reg.v1i64.i64(i32 10, i64 undef)
; CHECK: %[[SP:[^ ]+]] = call i64 @llvm.genx.rdregioni.i64.v1i64.i16(<1 x i64> %[[SPREAD]], i32 0, i32 1, i32 1, i16 0, i32 undef)

; COM: Calculate arguments offset in stack
; CHECK: %[[ARGBEGIN:[^ ]+]] = sub i64 %[[SP]], 1072

; COM: Other args are passed in stack. Check that simple llvm loads are generated.
; CHECK: %[[SP1PTR:[^ ]+]] = inttoptr i64 %[[ARGBEGIN]] to i32*
; CHECK: %[[ARG1:[^ ]+]] = load i32, i32* %[[SP1PTR]]

; CHECK: %[[SP2:[^ ]+]] = add i64 %[[ARGBEGIN]], 16
; CHECK: %[[SP2PTR:[^ ]+]] = inttoptr i64 %[[SP2]] to <256 x i32>*
; CHECK: %[[ARG2:[^ ]+]] =  load <256 x i32>, <256 x i32>* %[[SP2PTR]]

; CHECK: %[[SP3:[^ ]+]] = add i64 %[[SP2]], 1024
; CHECK: %[[SP3PTR:[^ ]+]] = inttoptr i64 %[[SP3]] to <31 x i8>*
; CHECK: %[[ARG3:[^ ]+]] =  load <31 x i8>, <31 x i8>* %[[SP3PTR]]

define internal spir_func i32 @foo(<256 x i32> %0, i32 %1, <256 x i32> %2, <31 x i8> %3) #0 {
entry:
  ret i32 %1
}

declare spir_func <256 x i32> @get_arg()
declare spir_func i32 @get_arg1()
declare spir_func <31 x i8> @get_arg2()

; CHECK-LABEL: bar
; COM: IGC calling conv: tmpFP = FP; FP = SP.
; CHECK: %[[FPREAD:[^ ]+]] = call <1 x i64> @llvm.genx.read.predef.reg.v1i64.i64(i32 11, i64 undef)
; CHECK: %[[FPRDR:[^ ]+]] = call <1 x i64> @llvm.genx.rdregioni.v1i64.v1i64.i16(<1 x i64> %[[FPREAD]], i32 0, i32 1, i32 1, i16 0, i32 undef)
; CHECK: %[[FPCOPY:[^ ]+]] = call <1 x i64> @llvm.genx.wrregioni.v1i64.v1i64.i16.i1(<1 x i64> undef, <1 x i64> %[[FPRDR]], i32 0, i32 1, i32 1, i16 0, i32 undef, i1 true)
; CHECK: %[[TMPFP:[^ ]+]] = call i64 @llvm.genx.rdregioni.i64.v1i64.i16(<1 x i64> %[[FPCOPY]], i32 0, i32 1, i32 1, i16 0, i32 undef)
; CHECK: %[[SPREAD:[^ ]+]] = call <1 x i64> @llvm.genx.read.predef.reg.v1i64.i64(i32 10, i64 undef)
; CHECK: %[[SPRDR:[^ ]+]] = call i64 @llvm.genx.rdregioni.i64.v1i64.i16(<1 x i64> %[[SPREAD]], i32 0, i32 1, i32 1, i16 0, i32 undef)
; CHECK: %[[FPWRR:[^ ]+]] = call i64 @llvm.genx.wrregioni.i64.i64.i16.i1(i64 undef, i64 %[[SPRDR]], i32 0, i32 1, i32 1, i16 0, i32 undef, i1 true)
; CHECK: call i64 @llvm.genx.write.predef.reg.i64.i64(i32 11, i64 %[[FPWRR]])

; COM: The first arg fully occupies ARG
; CHECK: %[[ARGREAD:[^ ]+]] = call <256 x i32> @llvm.genx.read.predef.reg.v256i32.v256i32(i32 8, <256 x i32> undef)
; CHECK: %[[ARG0:[^ ]+]] = call <256 x i32> @llvm.genx.wrregioni.v256i32.v256i32.i16.i1(<256 x i32> %[[ARGREAD]], <256 x i32> %arg1and3, i32 0, i32 256, i32 1, i16 0, i32 undef, i1 true)
; CHECK: %[[ARGWRITE:[^ ]+]] = call <256 x i32> @llvm.genx.write.predef.reg.v256i32.v256i32(i32 8, <256 x i32> %[[ARG0]])

; COM: read SP
; CHECK: %[[SPREAD:[^ ]+]] = call <1 x i64> @llvm.genx.read.predef.reg.v1i64.i64(i32 10, i64 undef)
; CHECK: %[[SPRDR:[^ ]+]] = call <1 x i64> @llvm.genx.rdregioni.v1i64.v1i64.i16(<1 x i64> %[[SPREAD]], i32 0, i32 1, i32 1, i16 0, i32 undef)
; CHECK: %[[SPCOPY:[^ ]+]] = call <1 x i64> @llvm.genx.wrregioni.v1i64.v1i64.i16.i1(<1 x i64> undef, <1 x i64> %[[SPRDR]], i32 0, i32 1, i32 1, i16 0, i32 undef, i1 true)
; CHECK: %[[SP:[^ ]+]] = call i64 @llvm.genx.rdregioni.i64.v1i64.i16(<1 x i64> %[[SPCOPY]], i32 0, i32 1, i32 1, i16 0, i32 undef)

; COM: allocate stack for args
; CHECK: %[[NEWSP:[^ ]+]] = add i64 %[[SP]], 1072
; CHECK: %[[NEWSPWRR:[^ ]+]] = call i64 @llvm.genx.wrregioni.i64.i64.i16.i1(i64 undef, i64 %[[NEWSP]], i32 0, i32 1, i32 1, i16 0, i32 undef, i1 true)
; CHECK: call i64 @llvm.genx.write.predef.reg.i64.i64(i32 10, i64 %[[NEWSPWRR]])

; COM: Other args are passed in stack. Check that simple llvm stores are generated.
; CHECK: %[[SP1PTR:[^ ]+]] = inttoptr i64 %[[SP]] to i32*
; CHECK: store i32 %arg2, i32* %[[SP1PTR]]

; CHECK: %[[SP2:[^ ]+]] = add i64 %[[SP]], 16
; CHECK: %[[SP2PTR:[^ ]+]] = inttoptr i64 %[[SP2]] to <256 x i32>*
; CHECK:  store <256 x i32> %arg1and3, <256 x i32>* %[[SP2PTR]]

; CHECK: %[[SP3:[^ ]+]] = add i64 %[[SP2]], 1024
; CHECK: %[[SP3PTR:[^ ]+]] = inttoptr i64 %[[SP3]] to <31 x i8>*
; CHECK: store <31 x i8> %arg4, <31 x i8>* %[[SP3PTR]]

; COM: Restore FP and SP: SP = FP; FP = tmpFP.
; CHECK: %[[FPREAD:[^ ]+]] = call <1 x i64> @llvm.genx.read.predef.reg.v1i64.i64(i32 11, i64 undef)
; CHECK: %[[FPRDR:[^ ]+]]  = call i64 @llvm.genx.rdregioni.i64.v1i64.i16(<1 x i64> %[[FPREAD]], i32 0, i32 1, i32 1, i16 0, i32 undef)
; CHECK: %[[SPWRR:[^ ]+]] = call i64 @llvm.genx.wrregioni.i64.i64.i16.i1(i64 undef, i64 %[[FPRDR]], i32 0, i32 1, i32 1, i16 0, i32 undef, i1 true)
; CHECK: call i64 @llvm.genx.write.predef.reg.i64.i64(i32 10, i64 %[[SPWRR]])
; CHECK: %[[FPWRR:[^ ]+]] = call i64 @llvm.genx.wrregioni.i64.i64.i16.i1(i64 undef, i64 %[[TMPFP]], i32 0, i32 1, i32 1, i16 0, i32 undef, i1 true)
; CHECK: call i64 @llvm.genx.write.predef.reg.i64.i64(i32 11, i64 %[[FPWRR]])

define internal spir_func i32 @bar() #0 {
entry:
  %arg1and3 = call spir_func <256 x i32> @get_arg()
  %arg2 = call spir_func i32 @get_arg1()
  %arg4 = call spir_func <31 x i8> @get_arg2()
  %res = tail call spir_func i32 @foo(<256 x i32> %arg1and3, i32 %arg2, <256 x i32> %arg1and3, <31 x i8> %arg4)
  ret i32 %res
}

; CHECK-NO: readnone "CMStackCall"
attributes #0 = { noinline nounwind readnone "CMStackCall" "VCFunction" "VCStackCall" }