1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 < %s | FileCheck %s
define <4 x i16> @test() {
; CHECK-LABEL: define <4 x i16> @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> zeroinitializer, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i16> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP25]], i64 0
; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP28]], i64 1
; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP29]], i16 [[TMP31]], i64 2
; CHECK-NEXT: [[TMP34:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP34]], i64 3
; CHECK-NEXT: [[RDX_OP:%.*]] = or <16 x i16> zeroinitializer, [[TMP1]]
; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i16> [[RDX_OP]], <16 x i16> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[TMP37:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP36]])
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> poison, i16 [[TMP37]], i64 0
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i16> [[RDX_OP]], <16 x i16> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
; CHECK-NEXT: [[TMP40:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP39]])
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i16> [[TMP38]], i16 [[TMP40]], i64 1
; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <16 x i16> [[RDX_OP]], <16 x i16> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
; CHECK-NEXT: [[TMP43:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP42]])
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP41]], i16 [[TMP43]], i64 2
; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i16> [[RDX_OP]], <16 x i16> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
; CHECK-NEXT: [[TMP46:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP45]])
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i16> [[TMP44]], i16 [[TMP46]], i64 3
; CHECK-NEXT: [[OP_RDX9:%.*]] = or <4 x i16> [[TMP47]], zeroinitializer
; CHECK-NEXT: [[OP_RDX11:%.*]] = or <4 x i16> [[OP_RDX9]], [[TMP35]]
; CHECK-NEXT: ret <4 x i16> [[OP_RDX11]]
;
entry:
%subi = add <4 x i16> zeroinitializer, zeroinitializer
%sub40.i = add <4 x i16> %subi, zeroinitializer
%sub41.i = add <4 x i16> %subi, zeroinitializer
%sub42.i = add <4 x i16> %subi, zeroinitializer
%sub43.i = add <4 x i16> %subi, zeroinitializer
%sub44.i = add <4 x i16> %subi, zeroinitializer
%sub45.i = add <4 x i16> %subi, zeroinitializer
%sub46.i = add <4 x i16> zeroinitializer, zeroinitializer
%sub47.i = add <4 x i16> zeroinitializer, zeroinitializer
%sub48.i = add <4 x i16> zeroinitializer, zeroinitializer
%sub49.i = add <4 x i16> zeroinitializer, zeroinitializer
%or40.i = or <4 x i16> %sub40.i, %sub41.i
%or41.i = or <4 x i16> %or40.i, %sub42.i
%or42.i = or <4 x i16> %or41.i, %sub43.i
%or43.i = or <4 x i16> %or42.i, %sub44.i
%or44.i = or <4 x i16> %or43.i, %sub45.i
%or45.i = or <4 x i16> %or44.i, %sub46.i
%or46.i = or <4 x i16> %or45.i, %sub47.i
%or47.i = or <4 x i16> %or46.i, %sub48.i
%or48.i = or <4 x i16> %or47.i, %sub49.i
%or50.i = or <4 x i16> %or48.i, %subi
%subii = add <4 x i16> zeroinitializer, zeroinitializer
%subi16.i = add <4 x i16> %subii, zeroinitializer
%subi17.i = add <4 x i16> %subii, zeroinitializer
%0 = or <4 x i16> %subi16.i, %subi17.i
%1 = or <4 x i16> %0, %or50.i
ret <4 x i16> %1
}
|