File: consolidated_load.ll

package info (click to toggle)
intel-graphics-compiler 1.0.17791.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 102,312 kB
  • sloc: cpp: 935,343; lisp: 286,143; ansic: 16,196; python: 3,279; yacc: 2,487; lex: 1,642; pascal: 300; sh: 174; makefile: 27
file content (33 lines) | stat: -rw-r--r-- 2,176 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2020-2021 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; RUN: %opt %use_old_pass_manager% -GenXPostLegalization -march=genx64 -mcpu=Gen9 -mtriple=spir64 -S < %s | FileCheck %s

;; Test that constant materilization works as expected in case of consolidated constants

target datalayout = "e-p:64:64-i64:64-n8:16:32"

; CHECK: @consolidated_load_splat
; CHECK-NEXT: xor <16 x i8> %arg, <i8 -1, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
define <16 x i8> @consolidated_load_splat_with_undefs(<16 x i8> %arg) {
  %val = xor <16 x i8> %arg, <i8 -1, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
  ret <16 x i8> %val
}

; CHECK: @consolidated_load_simple
; CHECK-NEXT: [[INIT_CONST:%[^ ]+]] = call <1 x i32> @llvm.genx.constanti.v1i32(<1 x i32> <i32 2071558244>)
; CHECK-NEXT: [[RDREG_SPLAT:%[^ ]+]] = call <4 x i32> @llvm.genx.rdregioni.v4i32.v1i32.i16(<1 x i32> [[INIT_CONST]], i32 0, i32 4, i32 0, i16 0, i32 undef)
; CHECK-NEXT: [[C2:%[^ ]+]] = call <4 x i32> @llvm.genx.wrconstregion.v4i32.v1i32.i16.i1(<4 x i32> [[RDREG_SPLAT]], <1 x i32> <i32 -117835013>, i32 1, i32 1, i32 1, i16 8, i32 undef, i1 true)
; CHECK-NEXT: [[C3:%[^ ]+]] = call <4 x i32> @llvm.genx.wrconstregion.v4i32.v1i32.i16.i1(<4 x i32> [[C2]], <1 x i32> <i32 67305985>, i32 1,
; CHECK-NEXT: [[C4:%[^ ]+]] = call <4 x i32> @llvm.genx.wrconstregion.v4i32.v1i32.i16.i1(<4 x i32> [[C3]], <1 x i32> <i32 -50462977>, i32 1, i32 1, i32 1, i16 0, i32 undef, i1 true)
; CHECK-NEXT: [[BITCASTED:%[^ ]+]] = bitcast <4 x i32> [[C4]] to <16 x i8>
; CHECK-NEXT: xor <16 x i8> %arg, [[BITCASTED]]
define <16 x i8> @consolidated_load_simple(<16 x i8> %arg) {
  %val = xor <16 x i8> %arg, <i8 -1, i8 -2, i8 -3, i8 -4, i8 1, i8 2, i8 3, i8 4, i8 -5, i8 -6, i8 -7, i8 -8, i8 100, i8 120, i8 121, i8 123>
  ret <16 x i8> %val
}