1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S < %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP3]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 28, i64 24>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison)
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 2, i32 1, i32 2, i32 1>
; CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP7]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x float> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> poison, <8 x i32> <i32 0, i32 5, i32 2, i32 7, i32 4, i32 1, i32 6, i32 3>
; CHECK-NEXT: store <8 x float> [[TMP13]], ptr addrspace(1) [[TMP6]], align 4
; CHECK-NEXT: ret void
;
%3 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 8
%4 = load float, ptr addrspace(1) %3 , align 4
%5 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 8
%6 = load float, ptr addrspace(1) %5 , align 4
%7 = fmul float %4, %6
%8 = fadd float %7, 0.000000e+00
%9 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 12
%10 = load float, ptr addrspace(1) %9 , align 4
%11 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 28
%12 = load float, ptr addrspace(1) %11 , align 4
%13 = fmul float %10, %12
%14 = fadd float %13, 0.000000e+00
%15 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 16
%16 = load float, ptr addrspace(1) %15 , align 4
%17 = fmul float %4, %16
%18 = fadd float %17, 0.000000e+00
%19 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 36
%20 = load float, ptr addrspace(1) %19 , align 4
%21 = fmul float %10, %20
%22 = fadd float %21, 0.000000e+00
%23 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 28
%24 = load float, ptr addrspace(1) %23 , align 4
%25 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 24
%26 = load float, ptr addrspace(1) %25 , align 4
%27 = fmul float %24, %26
%28 = fadd float %27, 0.000000e+00
%29 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 24
%30 = load float, ptr addrspace(1) %29 , align 4
%31 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 12
%32 = load float, ptr addrspace(1) %31 , align 4
%33 = fmul float %30, %32
%34 = fadd float %33, 0.000000e+00
%35 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 32
%36 = load float, ptr addrspace(1) %35 , align 4
%37 = fmul float %24, %36
%38 = fadd float %37, 0.000000e+00
%39 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 20
%40 = load float, ptr addrspace(1) %39 , align 4
%41 = fmul float %30, %40
%42 = fadd float %41, 0.000000e+00
store float %8, ptr addrspace(1) %3 , align 4
store float %14, ptr addrspace(1) %9 , align 4
%43 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 16
store float %18, ptr addrspace(1) %43 , align 4
%44 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 20
store float %22, ptr addrspace(1) %44 , align 4
store float %28, ptr addrspace(1) %29 , align 4
store float %34, ptr addrspace(1) %23 , align 4
%45 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 32
store float %38, ptr addrspace(1) %45 , align 4
%46 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 36
store float %42, ptr addrspace(1) %46 , align 4
ret void
}
attributes #0 = { "target-cpu"="skylake" }
|