1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2025 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; REQUIRES: regkeys
; RUN: igc_opt -S --igc-split-loads -platformpvc --regkey=LS_enableLoadSplitting=1 --regkey=LS_ignoreSplitThreshold=1 --regkey=LS_minSplitSize_GRF=0 --regkey=LS_minSplitSize_E=0 %s | FileCheck %s --check-prefix=MINSPLIT
; RUN: igc_opt -S --igc-split-loads -platformpvc --regkey=LS_enableLoadSplitting=1 --regkey=LS_ignoreSplitThreshold=1 --regkey=LS_minSplitSize_GRF=0 --regkey=LS_minSplitSize_E=16 %s | FileCheck %s --check-prefix=SPLIT16
declare spir_func void @fun_v8i16(<8 x i16>)
declare spir_func <32 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
define spir_kernel void @test_1(i64 %ptr) {
; MINSPLIT-LABEL: @test_1(
; MINSPLIT-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 16, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 24, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT: call void @fun_v8i16(<8 x i16> [[TMP1]])
; MINSPLIT-NEXT: call void @fun_v8i16(<8 x i16> [[TMP2]])
; MINSPLIT-NEXT: call void @fun_v8i16(<8 x i16> [[TMP3]])
; MINSPLIT-NEXT: call void @fun_v8i16(<8 x i16> [[TMP4]])
; MINSPLIT-NEXT: ret void
;
; SPLIT16-LABEL: @test_1(
; SPLIT16-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
; SPLIT16-NEXT: [[TMP2:%.*]] = extractelement <16 x i16> [[TMP1]], i64 0
; SPLIT16-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
; SPLIT16-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP1]], i64 1
; SPLIT16-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[TMP4]], i64 1
; SPLIT16-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP1]], i64 2
; SPLIT16-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[TMP6]], i64 2
; SPLIT16-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP1]], i64 3
; SPLIT16-NEXT: [[TMP9:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[TMP8]], i64 3
; SPLIT16-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP1]], i64 4
; SPLIT16-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[TMP10]], i64 4
; SPLIT16-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP1]], i64 5
; SPLIT16-NEXT: [[TMP13:%.*]] = insertelement <8 x i16> [[TMP11]], i16 [[TMP12]], i64 5
; SPLIT16-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP1]], i64 6
; SPLIT16-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> [[TMP13]], i16 [[TMP14]], i64 6
; SPLIT16-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP1]], i64 7
; SPLIT16-NEXT: [[TMP17:%.*]] = insertelement <8 x i16> [[TMP15]], i16 [[TMP16]], i64 7
; SPLIT16-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP1]], i64 8
; SPLIT16-NEXT: [[TMP19:%.*]] = insertelement <8 x i16> undef, i16 [[TMP18]], i64 0
; SPLIT16-NEXT: [[TMP20:%.*]] = extractelement <16 x i16> [[TMP1]], i64 9
; SPLIT16-NEXT: [[TMP21:%.*]] = insertelement <8 x i16> [[TMP19]], i16 [[TMP20]], i64 1
; SPLIT16-NEXT: [[TMP22:%.*]] = extractelement <16 x i16> [[TMP1]], i64 10
; SPLIT16-NEXT: [[TMP23:%.*]] = insertelement <8 x i16> [[TMP21]], i16 [[TMP22]], i64 2
; SPLIT16-NEXT: [[TMP24:%.*]] = extractelement <16 x i16> [[TMP1]], i64 11
; SPLIT16-NEXT: [[TMP25:%.*]] = insertelement <8 x i16> [[TMP23]], i16 [[TMP24]], i64 3
; SPLIT16-NEXT: [[TMP26:%.*]] = extractelement <16 x i16> [[TMP1]], i64 12
; SPLIT16-NEXT: [[TMP27:%.*]] = insertelement <8 x i16> [[TMP25]], i16 [[TMP26]], i64 4
; SPLIT16-NEXT: [[TMP28:%.*]] = extractelement <16 x i16> [[TMP1]], i64 13
; SPLIT16-NEXT: [[TMP29:%.*]] = insertelement <8 x i16> [[TMP27]], i16 [[TMP28]], i64 5
; SPLIT16-NEXT: [[TMP30:%.*]] = extractelement <16 x i16> [[TMP1]], i64 14
; SPLIT16-NEXT: [[TMP31:%.*]] = insertelement <8 x i16> [[TMP29]], i16 [[TMP30]], i64 6
; SPLIT16-NEXT: [[TMP32:%.*]] = extractelement <16 x i16> [[TMP1]], i64 15
; SPLIT16-NEXT: [[TMP33:%.*]] = insertelement <8 x i16> [[TMP31]], i16 [[TMP32]], i64 7
; SPLIT16-NEXT: [[TMP34:%.*]] = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 16, i32 16, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
; SPLIT16-NEXT: [[TMP35:%.*]] = extractelement <16 x i16> [[TMP34]], i64 0
; SPLIT16-NEXT: [[TMP36:%.*]] = insertelement <8 x i16> undef, i16 [[TMP35]], i64 0
; SPLIT16-NEXT: [[TMP37:%.*]] = extractelement <16 x i16> [[TMP34]], i64 1
; SPLIT16-NEXT: [[TMP38:%.*]] = insertelement <8 x i16> [[TMP36]], i16 [[TMP37]], i64 1
; SPLIT16-NEXT: [[TMP39:%.*]] = extractelement <16 x i16> [[TMP34]], i64 2
; SPLIT16-NEXT: [[TMP40:%.*]] = insertelement <8 x i16> [[TMP38]], i16 [[TMP39]], i64 2
; SPLIT16-NEXT: [[TMP41:%.*]] = extractelement <16 x i16> [[TMP34]], i64 3
; SPLIT16-NEXT: [[TMP42:%.*]] = insertelement <8 x i16> [[TMP40]], i16 [[TMP41]], i64 3
; SPLIT16-NEXT: [[TMP43:%.*]] = extractelement <16 x i16> [[TMP34]], i64 4
; SPLIT16-NEXT: [[TMP44:%.*]] = insertelement <8 x i16> [[TMP42]], i16 [[TMP43]], i64 4
; SPLIT16-NEXT: [[TMP45:%.*]] = extractelement <16 x i16> [[TMP34]], i64 5
; SPLIT16-NEXT: [[TMP46:%.*]] = insertelement <8 x i16> [[TMP44]], i16 [[TMP45]], i64 5
; SPLIT16-NEXT: [[TMP47:%.*]] = extractelement <16 x i16> [[TMP34]], i64 6
; SPLIT16-NEXT: [[TMP48:%.*]] = insertelement <8 x i16> [[TMP46]], i16 [[TMP47]], i64 6
; SPLIT16-NEXT: [[TMP49:%.*]] = extractelement <16 x i16> [[TMP34]], i64 7
; SPLIT16-NEXT: [[TMP50:%.*]] = insertelement <8 x i16> [[TMP48]], i16 [[TMP49]], i64 7
; SPLIT16-NEXT: [[TMP51:%.*]] = extractelement <16 x i16> [[TMP34]], i64 8
; SPLIT16-NEXT: [[TMP52:%.*]] = insertelement <8 x i16> undef, i16 [[TMP51]], i64 0
; SPLIT16-NEXT: [[TMP53:%.*]] = extractelement <16 x i16> [[TMP34]], i64 9
; SPLIT16-NEXT: [[TMP54:%.*]] = insertelement <8 x i16> [[TMP52]], i16 [[TMP53]], i64 1
; SPLIT16-NEXT: [[TMP55:%.*]] = extractelement <16 x i16> [[TMP34]], i64 10
; SPLIT16-NEXT: [[TMP56:%.*]] = insertelement <8 x i16> [[TMP54]], i16 [[TMP55]], i64 2
; SPLIT16-NEXT: [[TMP57:%.*]] = extractelement <16 x i16> [[TMP34]], i64 11
; SPLIT16-NEXT: [[TMP58:%.*]] = insertelement <8 x i16> [[TMP56]], i16 [[TMP57]], i64 3
; SPLIT16-NEXT: [[TMP59:%.*]] = extractelement <16 x i16> [[TMP34]], i64 12
; SPLIT16-NEXT: [[TMP60:%.*]] = insertelement <8 x i16> [[TMP58]], i16 [[TMP59]], i64 4
; SPLIT16-NEXT: [[TMP61:%.*]] = extractelement <16 x i16> [[TMP34]], i64 13
; SPLIT16-NEXT: [[TMP62:%.*]] = insertelement <8 x i16> [[TMP60]], i16 [[TMP61]], i64 5
; SPLIT16-NEXT: [[TMP63:%.*]] = extractelement <16 x i16> [[TMP34]], i64 14
; SPLIT16-NEXT: [[TMP64:%.*]] = insertelement <8 x i16> [[TMP62]], i16 [[TMP63]], i64 6
; SPLIT16-NEXT: [[TMP65:%.*]] = extractelement <16 x i16> [[TMP34]], i64 15
; SPLIT16-NEXT: [[TMP66:%.*]] = insertelement <8 x i16> [[TMP64]], i16 [[TMP65]], i64 7
; SPLIT16-NEXT: call void @fun_v8i16(<8 x i16> [[TMP17]])
; SPLIT16-NEXT: call void @fun_v8i16(<8 x i16> [[TMP33]])
; SPLIT16-NEXT: call void @fun_v8i16(<8 x i16> [[TMP50]])
; SPLIT16-NEXT: call void @fun_v8i16(<8 x i16> [[TMP66]])
; SPLIT16-NEXT: ret void
;
%1 = call <32 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 32, i32 1, i1 false, i1 false, i32 0)
%2 = extractelement <32 x i16> %1, i32 0
%3 = insertelement <8 x i16> undef, i16 %2, i32 0
%4 = extractelement <32 x i16> %1, i32 1
%5 = insertelement <8 x i16> %3, i16 %4, i32 1
%6 = extractelement <32 x i16> %1, i32 2
%7 = insertelement <8 x i16> %5, i16 %6, i32 2
%8 = extractelement <32 x i16> %1, i32 3
%9 = insertelement <8 x i16> %7, i16 %8, i32 3
%10 = extractelement <32 x i16> %1, i32 4
%11 = insertelement <8 x i16> %9, i16 %10, i32 4
%12 = extractelement <32 x i16> %1, i32 5
%13 = insertelement <8 x i16> %11, i16 %12, i32 5
%14 = extractelement <32 x i16> %1, i32 6
%15 = insertelement <8 x i16> %13, i16 %14, i32 6
%16 = extractelement <32 x i16> %1, i32 7
%17 = insertelement <8 x i16> %15, i16 %16, i32 7
%18 = extractelement <32 x i16> %1, i32 8
%19 = insertelement <8 x i16> undef, i16 %18, i32 0
%20 = extractelement <32 x i16> %1, i32 9
%21 = insertelement <8 x i16> %19, i16 %20, i32 1
%22 = extractelement <32 x i16> %1, i32 10
%23 = insertelement <8 x i16> %21, i16 %22, i32 2
%24 = extractelement <32 x i16> %1, i32 11
%25 = insertelement <8 x i16> %23, i16 %24, i32 3
%26 = extractelement <32 x i16> %1, i32 12
%27 = insertelement <8 x i16> %25, i16 %26, i32 4
%28 = extractelement <32 x i16> %1, i32 13
%29 = insertelement <8 x i16> %27, i16 %28, i32 5
%30 = extractelement <32 x i16> %1, i32 14
%31 = insertelement <8 x i16> %29, i16 %30, i32 6
%32 = extractelement <32 x i16> %1, i32 15
%33 = insertelement <8 x i16> %31, i16 %32, i32 7
%34 = extractelement <32 x i16> %1, i32 16
%35 = insertelement <8 x i16> undef, i16 %34, i32 0
%36 = extractelement <32 x i16> %1, i32 17
%37 = insertelement <8 x i16> %35, i16 %36, i32 1
%38 = extractelement <32 x i16> %1, i32 18
%39 = insertelement <8 x i16> %37, i16 %38, i32 2
%40 = extractelement <32 x i16> %1, i32 19
%41 = insertelement <8 x i16> %39, i16 %40, i32 3
%42 = extractelement <32 x i16> %1, i32 20
%43 = insertelement <8 x i16> %41, i16 %42, i32 4
%44 = extractelement <32 x i16> %1, i32 21
%45 = insertelement <8 x i16> %43, i16 %44, i32 5
%46 = extractelement <32 x i16> %1, i32 22
%47 = insertelement <8 x i16> %45, i16 %46, i32 6
%48 = extractelement <32 x i16> %1, i32 23
%49 = insertelement <8 x i16> %47, i16 %48, i32 7
%50 = extractelement <32 x i16> %1, i32 24
%51 = insertelement <8 x i16> undef, i16 %50, i32 0
%52 = extractelement <32 x i16> %1, i32 25
%53 = insertelement <8 x i16> %51, i16 %52, i32 1
%54 = extractelement <32 x i16> %1, i32 26
%55 = insertelement <8 x i16> %53, i16 %54, i32 2
%56 = extractelement <32 x i16> %1, i32 27
%57 = insertelement <8 x i16> %55, i16 %56, i32 3
%58 = extractelement <32 x i16> %1, i32 28
%59 = insertelement <8 x i16> %57, i16 %58, i32 4
%60 = extractelement <32 x i16> %1, i32 29
%61 = insertelement <8 x i16> %59, i16 %60, i32 5
%62 = extractelement <32 x i16> %1, i32 30
%63 = insertelement <8 x i16> %61, i16 %62, i32 6
%64 = extractelement <32 x i16> %1, i32 31
%65 = insertelement <8 x i16> %63, i16 %64, i32 7
call void @fun_v8i16(<8 x i16> %17)
call void @fun_v8i16(<8 x i16> %33)
call void @fun_v8i16(<8 x i16> %49)
call void @fun_v8i16(<8 x i16> %65)
ret void
}
|