File: insert-extracts.ll

package info (click to toggle)
intel-graphics-compiler2 2.16.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 106,644 kB
  • sloc: cpp: 805,640; lisp: 287,672; ansic: 16,414; python: 3,952; yacc: 2,588; lex: 1,666; pascal: 313; sh: 186; makefile: 35
file content (175 lines) | stat: -rw-r--r-- 11,605 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2025 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; REQUIRES: regkeys
; RUN: igc_opt -S --igc-split-loads -platformpvc --regkey=LS_enableLoadSplitting=1 --regkey=LS_ignoreSplitThreshold=1 --regkey=LS_minSplitSize_GRF=0 --regkey=LS_minSplitSize_E=0 %s | FileCheck %s --check-prefix=MINSPLIT
; RUN: igc_opt -S --igc-split-loads -platformpvc --regkey=LS_enableLoadSplitting=1 --regkey=LS_ignoreSplitThreshold=1 --regkey=LS_minSplitSize_GRF=0 --regkey=LS_minSplitSize_E=16 %s | FileCheck %s --check-prefix=SPLIT16

declare spir_func void @fun_v8i16(<8 x i16>)

declare spir_func <32 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)

define spir_kernel void @test_1(i64 %ptr) {
; MINSPLIT-LABEL: @test_1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 16, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 24, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
; SPLIT16-LABEL: @test_1(
; SPLIT16-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
; SPLIT16-NEXT:    [[TMP2:%.*]] = extractelement <16 x i16> [[TMP1]], i64 0
; SPLIT16-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
; SPLIT16-NEXT:    [[TMP4:%.*]] = extractelement <16 x i16> [[TMP1]], i64 1
; SPLIT16-NEXT:    [[TMP5:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[TMP4]], i64 1
; SPLIT16-NEXT:    [[TMP6:%.*]] = extractelement <16 x i16> [[TMP1]], i64 2
; SPLIT16-NEXT:    [[TMP7:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[TMP6]], i64 2
; SPLIT16-NEXT:    [[TMP8:%.*]] = extractelement <16 x i16> [[TMP1]], i64 3
; SPLIT16-NEXT:    [[TMP9:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[TMP8]], i64 3
; SPLIT16-NEXT:    [[TMP10:%.*]] = extractelement <16 x i16> [[TMP1]], i64 4
; SPLIT16-NEXT:    [[TMP11:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[TMP10]], i64 4
; SPLIT16-NEXT:    [[TMP12:%.*]] = extractelement <16 x i16> [[TMP1]], i64 5
; SPLIT16-NEXT:    [[TMP13:%.*]] = insertelement <8 x i16> [[TMP11]], i16 [[TMP12]], i64 5
; SPLIT16-NEXT:    [[TMP14:%.*]] = extractelement <16 x i16> [[TMP1]], i64 6
; SPLIT16-NEXT:    [[TMP15:%.*]] = insertelement <8 x i16> [[TMP13]], i16 [[TMP14]], i64 6
; SPLIT16-NEXT:    [[TMP16:%.*]] = extractelement <16 x i16> [[TMP1]], i64 7
; SPLIT16-NEXT:    [[TMP17:%.*]] = insertelement <8 x i16> [[TMP15]], i16 [[TMP16]], i64 7
; SPLIT16-NEXT:    [[TMP18:%.*]] = extractelement <16 x i16> [[TMP1]], i64 8
; SPLIT16-NEXT:    [[TMP19:%.*]] = insertelement <8 x i16> undef, i16 [[TMP18]], i64 0
; SPLIT16-NEXT:    [[TMP20:%.*]] = extractelement <16 x i16> [[TMP1]], i64 9
; SPLIT16-NEXT:    [[TMP21:%.*]] = insertelement <8 x i16> [[TMP19]], i16 [[TMP20]], i64 1
; SPLIT16-NEXT:    [[TMP22:%.*]] = extractelement <16 x i16> [[TMP1]], i64 10
; SPLIT16-NEXT:    [[TMP23:%.*]] = insertelement <8 x i16> [[TMP21]], i16 [[TMP22]], i64 2
; SPLIT16-NEXT:    [[TMP24:%.*]] = extractelement <16 x i16> [[TMP1]], i64 11
; SPLIT16-NEXT:    [[TMP25:%.*]] = insertelement <8 x i16> [[TMP23]], i16 [[TMP24]], i64 3
; SPLIT16-NEXT:    [[TMP26:%.*]] = extractelement <16 x i16> [[TMP1]], i64 12
; SPLIT16-NEXT:    [[TMP27:%.*]] = insertelement <8 x i16> [[TMP25]], i16 [[TMP26]], i64 4
; SPLIT16-NEXT:    [[TMP28:%.*]] = extractelement <16 x i16> [[TMP1]], i64 13
; SPLIT16-NEXT:    [[TMP29:%.*]] = insertelement <8 x i16> [[TMP27]], i16 [[TMP28]], i64 5
; SPLIT16-NEXT:    [[TMP30:%.*]] = extractelement <16 x i16> [[TMP1]], i64 14
; SPLIT16-NEXT:    [[TMP31:%.*]] = insertelement <8 x i16> [[TMP29]], i16 [[TMP30]], i64 6
; SPLIT16-NEXT:    [[TMP32:%.*]] = extractelement <16 x i16> [[TMP1]], i64 15
; SPLIT16-NEXT:    [[TMP33:%.*]] = insertelement <8 x i16> [[TMP31]], i16 [[TMP32]], i64 7
; SPLIT16-NEXT:    [[TMP34:%.*]] = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 16, i32 16, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
; SPLIT16-NEXT:    [[TMP35:%.*]] = extractelement <16 x i16> [[TMP34]], i64 0
; SPLIT16-NEXT:    [[TMP36:%.*]] = insertelement <8 x i16> undef, i16 [[TMP35]], i64 0
; SPLIT16-NEXT:    [[TMP37:%.*]] = extractelement <16 x i16> [[TMP34]], i64 1
; SPLIT16-NEXT:    [[TMP38:%.*]] = insertelement <8 x i16> [[TMP36]], i16 [[TMP37]], i64 1
; SPLIT16-NEXT:    [[TMP39:%.*]] = extractelement <16 x i16> [[TMP34]], i64 2
; SPLIT16-NEXT:    [[TMP40:%.*]] = insertelement <8 x i16> [[TMP38]], i16 [[TMP39]], i64 2
; SPLIT16-NEXT:    [[TMP41:%.*]] = extractelement <16 x i16> [[TMP34]], i64 3
; SPLIT16-NEXT:    [[TMP42:%.*]] = insertelement <8 x i16> [[TMP40]], i16 [[TMP41]], i64 3
; SPLIT16-NEXT:    [[TMP43:%.*]] = extractelement <16 x i16> [[TMP34]], i64 4
; SPLIT16-NEXT:    [[TMP44:%.*]] = insertelement <8 x i16> [[TMP42]], i16 [[TMP43]], i64 4
; SPLIT16-NEXT:    [[TMP45:%.*]] = extractelement <16 x i16> [[TMP34]], i64 5
; SPLIT16-NEXT:    [[TMP46:%.*]] = insertelement <8 x i16> [[TMP44]], i16 [[TMP45]], i64 5
; SPLIT16-NEXT:    [[TMP47:%.*]] = extractelement <16 x i16> [[TMP34]], i64 6
; SPLIT16-NEXT:    [[TMP48:%.*]] = insertelement <8 x i16> [[TMP46]], i16 [[TMP47]], i64 6
; SPLIT16-NEXT:    [[TMP49:%.*]] = extractelement <16 x i16> [[TMP34]], i64 7
; SPLIT16-NEXT:    [[TMP50:%.*]] = insertelement <8 x i16> [[TMP48]], i16 [[TMP49]], i64 7
; SPLIT16-NEXT:    [[TMP51:%.*]] = extractelement <16 x i16> [[TMP34]], i64 8
; SPLIT16-NEXT:    [[TMP52:%.*]] = insertelement <8 x i16> undef, i16 [[TMP51]], i64 0
; SPLIT16-NEXT:    [[TMP53:%.*]] = extractelement <16 x i16> [[TMP34]], i64 9
; SPLIT16-NEXT:    [[TMP54:%.*]] = insertelement <8 x i16> [[TMP52]], i16 [[TMP53]], i64 1
; SPLIT16-NEXT:    [[TMP55:%.*]] = extractelement <16 x i16> [[TMP34]], i64 10
; SPLIT16-NEXT:    [[TMP56:%.*]] = insertelement <8 x i16> [[TMP54]], i16 [[TMP55]], i64 2
; SPLIT16-NEXT:    [[TMP57:%.*]] = extractelement <16 x i16> [[TMP34]], i64 11
; SPLIT16-NEXT:    [[TMP58:%.*]] = insertelement <8 x i16> [[TMP56]], i16 [[TMP57]], i64 3
; SPLIT16-NEXT:    [[TMP59:%.*]] = extractelement <16 x i16> [[TMP34]], i64 12
; SPLIT16-NEXT:    [[TMP60:%.*]] = insertelement <8 x i16> [[TMP58]], i16 [[TMP59]], i64 4
; SPLIT16-NEXT:    [[TMP61:%.*]] = extractelement <16 x i16> [[TMP34]], i64 13
; SPLIT16-NEXT:    [[TMP62:%.*]] = insertelement <8 x i16> [[TMP60]], i16 [[TMP61]], i64 5
; SPLIT16-NEXT:    [[TMP63:%.*]] = extractelement <16 x i16> [[TMP34]], i64 14
; SPLIT16-NEXT:    [[TMP64:%.*]] = insertelement <8 x i16> [[TMP62]], i16 [[TMP63]], i64 6
; SPLIT16-NEXT:    [[TMP65:%.*]] = extractelement <16 x i16> [[TMP34]], i64 15
; SPLIT16-NEXT:    [[TMP66:%.*]] = insertelement <8 x i16> [[TMP64]], i16 [[TMP65]], i64 7
; SPLIT16-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP17]])
; SPLIT16-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP33]])
; SPLIT16-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP50]])
; SPLIT16-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP66]])
; SPLIT16-NEXT:    ret void
;
  %1 = call <32 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 32, i32 1, i1 false, i1 false, i32 0)
  %2 = extractelement <32 x i16> %1, i32 0
  %3 = insertelement <8 x i16> undef, i16 %2, i32 0
  %4 = extractelement <32 x i16> %1, i32 1
  %5 = insertelement <8 x i16> %3, i16 %4, i32 1
  %6 = extractelement <32 x i16> %1, i32 2
  %7 = insertelement <8 x i16> %5, i16 %6, i32 2
  %8 = extractelement <32 x i16> %1, i32 3
  %9 = insertelement <8 x i16> %7, i16 %8, i32 3
  %10 = extractelement <32 x i16> %1, i32 4
  %11 = insertelement <8 x i16> %9, i16 %10, i32 4
  %12 = extractelement <32 x i16> %1, i32 5
  %13 = insertelement <8 x i16> %11, i16 %12, i32 5
  %14 = extractelement <32 x i16> %1, i32 6
  %15 = insertelement <8 x i16> %13, i16 %14, i32 6
  %16 = extractelement <32 x i16> %1, i32 7
  %17 = insertelement <8 x i16> %15, i16 %16, i32 7
  %18 = extractelement <32 x i16> %1, i32 8
  %19 = insertelement <8 x i16> undef, i16 %18, i32 0
  %20 = extractelement <32 x i16> %1, i32 9
  %21 = insertelement <8 x i16> %19, i16 %20, i32 1
  %22 = extractelement <32 x i16> %1, i32 10
  %23 = insertelement <8 x i16> %21, i16 %22, i32 2
  %24 = extractelement <32 x i16> %1, i32 11
  %25 = insertelement <8 x i16> %23, i16 %24, i32 3
  %26 = extractelement <32 x i16> %1, i32 12
  %27 = insertelement <8 x i16> %25, i16 %26, i32 4
  %28 = extractelement <32 x i16> %1, i32 13
  %29 = insertelement <8 x i16> %27, i16 %28, i32 5
  %30 = extractelement <32 x i16> %1, i32 14
  %31 = insertelement <8 x i16> %29, i16 %30, i32 6
  %32 = extractelement <32 x i16> %1, i32 15
  %33 = insertelement <8 x i16> %31, i16 %32, i32 7
  %34 = extractelement <32 x i16> %1, i32 16
  %35 = insertelement <8 x i16> undef, i16 %34, i32 0
  %36 = extractelement <32 x i16> %1, i32 17
  %37 = insertelement <8 x i16> %35, i16 %36, i32 1
  %38 = extractelement <32 x i16> %1, i32 18
  %39 = insertelement <8 x i16> %37, i16 %38, i32 2
  %40 = extractelement <32 x i16> %1, i32 19
  %41 = insertelement <8 x i16> %39, i16 %40, i32 3
  %42 = extractelement <32 x i16> %1, i32 20
  %43 = insertelement <8 x i16> %41, i16 %42, i32 4
  %44 = extractelement <32 x i16> %1, i32 21
  %45 = insertelement <8 x i16> %43, i16 %44, i32 5
  %46 = extractelement <32 x i16> %1, i32 22
  %47 = insertelement <8 x i16> %45, i16 %46, i32 6
  %48 = extractelement <32 x i16> %1, i32 23
  %49 = insertelement <8 x i16> %47, i16 %48, i32 7
  %50 = extractelement <32 x i16> %1, i32 24
  %51 = insertelement <8 x i16> undef, i16 %50, i32 0
  %52 = extractelement <32 x i16> %1, i32 25
  %53 = insertelement <8 x i16> %51, i16 %52, i32 1
  %54 = extractelement <32 x i16> %1, i32 26
  %55 = insertelement <8 x i16> %53, i16 %54, i32 2
  %56 = extractelement <32 x i16> %1, i32 27
  %57 = insertelement <8 x i16> %55, i16 %56, i32 3
  %58 = extractelement <32 x i16> %1, i32 28
  %59 = insertelement <8 x i16> %57, i16 %58, i32 4
  %60 = extractelement <32 x i16> %1, i32 29
  %61 = insertelement <8 x i16> %59, i16 %60, i32 5
  %62 = extractelement <32 x i16> %1, i32 30
  %63 = insertelement <8 x i16> %61, i16 %62, i32 6
  %64 = extractelement <32 x i16> %1, i32 31
  %65 = insertelement <8 x i16> %63, i16 %64, i32 7

  call void @fun_v8i16(<8 x i16> %17)
  call void @fun_v8i16(<8 x i16> %33)
  call void @fun_v8i16(<8 x i16> %49)
  call void @fun_v8i16(<8 x i16> %65)

  ret void
}