1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
; RUN: llc < %s -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -enable-misched -pre-RA-sched=source -scheditins=false | FileCheck %s
;
; Test MI-Sched suppory latency based stalls on in in-order pipeline
; using the new machine model.
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
; Don't be too strict with the top of the schedule, but most of it
; should be nicely pipelined.
;
; CHECK: saxpy10:
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vmul
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vmov
; CHECK-NEXT: bx
;
; This accumulates a sum rather than storing each result.
define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
entry:
%0 = load float, float* %data1, align 4
%mul = fmul float %0, %a
%1 = load float, float* %data2, align 4
%add = fadd float %mul, %1
%add2 = fadd float %add, 0.000000e+00
%arrayidx.1 = getelementptr inbounds float, float* %data1, i32 1
%2 = load float, float* %arrayidx.1, align 4
%mul.1 = fmul float %2, %a
%arrayidx1.1 = getelementptr inbounds float, float* %data2, i32 1
%3 = load float, float* %arrayidx1.1, align 4
%add.1 = fadd float %mul.1, %3
%add2.1 = fadd float %add2, %add.1
%arrayidx.2 = getelementptr inbounds float, float* %data1, i32 2
%4 = load float, float* %arrayidx.2, align 4
%mul.2 = fmul float %4, %a
%arrayidx1.2 = getelementptr inbounds float, float* %data2, i32 2
%5 = load float, float* %arrayidx1.2, align 4
%add.2 = fadd float %mul.2, %5
%add2.2 = fadd float %add2.1, %add.2
%arrayidx.3 = getelementptr inbounds float, float* %data1, i32 3
%6 = load float, float* %arrayidx.3, align 4
%mul.3 = fmul float %6, %a
%arrayidx1.3 = getelementptr inbounds float, float* %data2, i32 3
%7 = load float, float* %arrayidx1.3, align 4
%add.3 = fadd float %mul.3, %7
%add2.3 = fadd float %add2.2, %add.3
%arrayidx.4 = getelementptr inbounds float, float* %data1, i32 4
%8 = load float, float* %arrayidx.4, align 4
%mul.4 = fmul float %8, %a
%arrayidx1.4 = getelementptr inbounds float, float* %data2, i32 4
%9 = load float, float* %arrayidx1.4, align 4
%add.4 = fadd float %mul.4, %9
%add2.4 = fadd float %add2.3, %add.4
%arrayidx.5 = getelementptr inbounds float, float* %data1, i32 5
%10 = load float, float* %arrayidx.5, align 4
%mul.5 = fmul float %10, %a
%arrayidx1.5 = getelementptr inbounds float, float* %data2, i32 5
%11 = load float, float* %arrayidx1.5, align 4
%add.5 = fadd float %mul.5, %11
%add2.5 = fadd float %add2.4, %add.5
%arrayidx.6 = getelementptr inbounds float, float* %data1, i32 6
%12 = load float, float* %arrayidx.6, align 4
%mul.6 = fmul float %12, %a
%arrayidx1.6 = getelementptr inbounds float, float* %data2, i32 6
%13 = load float, float* %arrayidx1.6, align 4
%add.6 = fadd float %mul.6, %13
%add2.6 = fadd float %add2.5, %add.6
%arrayidx.7 = getelementptr inbounds float, float* %data1, i32 7
%14 = load float, float* %arrayidx.7, align 4
%mul.7 = fmul float %14, %a
%arrayidx1.7 = getelementptr inbounds float, float* %data2, i32 7
%15 = load float, float* %arrayidx1.7, align 4
%add.7 = fadd float %mul.7, %15
%add2.7 = fadd float %add2.6, %add.7
%arrayidx.8 = getelementptr inbounds float, float* %data1, i32 8
%16 = load float, float* %arrayidx.8, align 4
%mul.8 = fmul float %16, %a
%arrayidx1.8 = getelementptr inbounds float, float* %data2, i32 8
%17 = load float, float* %arrayidx1.8, align 4
%add.8 = fadd float %mul.8, %17
%add2.8 = fadd float %add2.7, %add.8
%arrayidx.9 = getelementptr inbounds float, float* %data1, i32 9
%18 = load float, float* %arrayidx.9, align 4
%mul.9 = fmul float %18, %a
%arrayidx1.9 = getelementptr inbounds float, float* %data2, i32 9
%19 = load float, float* %arrayidx1.9, align 4
%add.9 = fadd float %mul.9, %19
%add2.9 = fadd float %add2.8, %add.9
ret float %add2.9
}
|