1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; void foo(float *data, float d) {
; long i;
; for (i = 0; i < 8000; i++)
; data[i] = d;
; }
;
; This loop will be unrolled by 96 and vectorized on power9.
; icmp for loop iteration index and loop trip count(384) has LSRUse for 'reg({0,+,384})'.
; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})'
define void @foo(float* nocapture %data, float %d) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscvdpspn 0, 1
; CHECK-NEXT: li 5, 83
; CHECK-NEXT: addi 4, 3, 192
; CHECK-NEXT: mtctr 5
; CHECK-NEXT: xxspltw 0, 0, 0
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: #
; CHECK-NEXT: stxv 0, -192(4)
; CHECK-NEXT: stxv 0, -176(4)
; CHECK-NEXT: stxv 0, -160(4)
; CHECK-NEXT: stxv 0, -144(4)
; CHECK-NEXT: stxv 0, -128(4)
; CHECK-NEXT: stxv 0, -112(4)
; CHECK-NEXT: stxv 0, -96(4)
; CHECK-NEXT: stxv 0, -80(4)
; CHECK-NEXT: stxv 0, -64(4)
; CHECK-NEXT: stxv 0, -48(4)
; CHECK-NEXT: stxv 0, -32(4)
; CHECK-NEXT: stxv 0, -16(4)
; CHECK-NEXT: stxv 0, 0(4)
; CHECK-NEXT: stxv 0, 16(4)
; CHECK-NEXT: stxv 0, 32(4)
; CHECK-NEXT: stxv 0, 48(4)
; CHECK-NEXT: stxv 0, 64(4)
; CHECK-NEXT: stxv 0, 80(4)
; CHECK-NEXT: stxv 0, 96(4)
; CHECK-NEXT: stxv 0, 112(4)
; CHECK-NEXT: stxv 0, 128(4)
; CHECK-NEXT: stxv 0, 144(4)
; CHECK-NEXT: stxv 0, 160(4)
; CHECK-NEXT: stxv 0, 176(4)
; CHECK-NEXT: addi 4, 4, 384
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.body
; CHECK-NEXT: stfs 1, 31872(3)
; CHECK-NEXT: stfs 1, 31876(3)
; CHECK-NEXT: stfs 1, 31880(3)
; CHECK-NEXT: stfs 1, 31884(3)
; CHECK-NEXT: stfs 1, 31888(3)
; CHECK-NEXT: stfs 1, 31892(3)
; CHECK-NEXT: stfs 1, 31896(3)
; CHECK-NEXT: stfs 1, 31900(3)
; CHECK-NEXT: stfs 1, 31904(3)
; CHECK-NEXT: stfs 1, 31908(3)
; CHECK-NEXT: stfs 1, 31912(3)
; CHECK-NEXT: stfs 1, 31916(3)
; CHECK-NEXT: stfs 1, 31920(3)
; CHECK-NEXT: stfs 1, 31924(3)
; CHECK-NEXT: stfs 1, 31928(3)
; CHECK-NEXT: stfs 1, 31932(3)
; CHECK-NEXT: stfs 1, 31936(3)
; CHECK-NEXT: stfs 1, 31940(3)
; CHECK-NEXT: stfs 1, 31944(3)
; CHECK-NEXT: stfs 1, 31948(3)
; CHECK-NEXT: stfs 1, 31952(3)
; CHECK-NEXT: stfs 1, 31956(3)
; CHECK-NEXT: stfs 1, 31960(3)
; CHECK-NEXT: stfs 1, 31964(3)
; CHECK-NEXT: stfs 1, 31968(3)
; CHECK-NEXT: stfs 1, 31972(3)
; CHECK-NEXT: stfs 1, 31976(3)
; CHECK-NEXT: stfs 1, 31980(3)
; CHECK-NEXT: stfs 1, 31984(3)
; CHECK-NEXT: stfs 1, 31988(3)
; CHECK-NEXT: stfs 1, 31992(3)
; CHECK-NEXT: stfs 1, 31996(3)
; CHECK-NEXT: blr
entry:
%broadcast.splatinsert16 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat17 = shufflevector <4 x float> %broadcast.splatinsert16, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert18 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat19 = shufflevector <4 x float> %broadcast.splatinsert18, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert20 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat21 = shufflevector <4 x float> %broadcast.splatinsert20, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert22 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat23 = shufflevector <4 x float> %broadcast.splatinsert22, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert24 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat25 = shufflevector <4 x float> %broadcast.splatinsert24, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert26 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat27 = shufflevector <4 x float> %broadcast.splatinsert26, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert28 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat29 = shufflevector <4 x float> %broadcast.splatinsert28, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert30 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat31 = shufflevector <4 x float> %broadcast.splatinsert30, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert32 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat33 = shufflevector <4 x float> %broadcast.splatinsert32, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert34 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat35 = shufflevector <4 x float> %broadcast.splatinsert34, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert36 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat37 = shufflevector <4 x float> %broadcast.splatinsert36, <4 x float> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert38 = insertelement <4 x float> undef, float %d, i32 0
%broadcast.splat39 = shufflevector <4 x float> %broadcast.splatinsert38, <4 x float> undef, <4 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next.1, %vector.body ]
%0 = getelementptr inbounds float, float* %data, i64 %index
%1 = bitcast float* %0 to <4 x float>*
store <4 x float> %broadcast.splat17, <4 x float>* %1, align 4
%2 = getelementptr inbounds float, float* %0, i64 4
%3 = bitcast float* %2 to <4 x float>*
store <4 x float> %broadcast.splat19, <4 x float>* %3, align 4
%4 = getelementptr inbounds float, float* %0, i64 8
%5 = bitcast float* %4 to <4 x float>*
store <4 x float> %broadcast.splat21, <4 x float>* %5, align 4
%6 = getelementptr inbounds float, float* %0, i64 12
%7 = bitcast float* %6 to <4 x float>*
store <4 x float> %broadcast.splat23, <4 x float>* %7, align 4
%8 = getelementptr inbounds float, float* %0, i64 16
%9 = bitcast float* %8 to <4 x float>*
store <4 x float> %broadcast.splat25, <4 x float>* %9, align 4
%10 = getelementptr inbounds float, float* %0, i64 20
%11 = bitcast float* %10 to <4 x float>*
store <4 x float> %broadcast.splat27, <4 x float>* %11, align 4
%12 = getelementptr inbounds float, float* %0, i64 24
%13 = bitcast float* %12 to <4 x float>*
store <4 x float> %broadcast.splat29, <4 x float>* %13, align 4
%14 = getelementptr inbounds float, float* %0, i64 28
%15 = bitcast float* %14 to <4 x float>*
store <4 x float> %broadcast.splat31, <4 x float>* %15, align 4
%16 = getelementptr inbounds float, float* %0, i64 32
%17 = bitcast float* %16 to <4 x float>*
store <4 x float> %broadcast.splat33, <4 x float>* %17, align 4
%18 = getelementptr inbounds float, float* %0, i64 36
%19 = bitcast float* %18 to <4 x float>*
store <4 x float> %broadcast.splat35, <4 x float>* %19, align 4
%20 = getelementptr inbounds float, float* %0, i64 40
%21 = bitcast float* %20 to <4 x float>*
store <4 x float> %broadcast.splat37, <4 x float>* %21, align 4
%22 = getelementptr inbounds float, float* %0, i64 44
%23 = bitcast float* %22 to <4 x float>*
store <4 x float> %broadcast.splat39, <4 x float>* %23, align 4
%index.next = add nuw nsw i64 %index, 48
%24 = getelementptr inbounds float, float* %data, i64 %index.next
%25 = bitcast float* %24 to <4 x float>*
store <4 x float> %broadcast.splat17, <4 x float>* %25, align 4
%26 = getelementptr inbounds float, float* %24, i64 4
%27 = bitcast float* %26 to <4 x float>*
store <4 x float> %broadcast.splat19, <4 x float>* %27, align 4
%28 = getelementptr inbounds float, float* %24, i64 8
%29 = bitcast float* %28 to <4 x float>*
store <4 x float> %broadcast.splat21, <4 x float>* %29, align 4
%30 = getelementptr inbounds float, float* %24, i64 12
%31 = bitcast float* %30 to <4 x float>*
store <4 x float> %broadcast.splat23, <4 x float>* %31, align 4
%32 = getelementptr inbounds float, float* %24, i64 16
%33 = bitcast float* %32 to <4 x float>*
store <4 x float> %broadcast.splat25, <4 x float>* %33, align 4
%34 = getelementptr inbounds float, float* %24, i64 20
%35 = bitcast float* %34 to <4 x float>*
store <4 x float> %broadcast.splat27, <4 x float>* %35, align 4
%36 = getelementptr inbounds float, float* %24, i64 24
%37 = bitcast float* %36 to <4 x float>*
store <4 x float> %broadcast.splat29, <4 x float>* %37, align 4
%38 = getelementptr inbounds float, float* %24, i64 28
%39 = bitcast float* %38 to <4 x float>*
store <4 x float> %broadcast.splat31, <4 x float>* %39, align 4
%40 = getelementptr inbounds float, float* %24, i64 32
%41 = bitcast float* %40 to <4 x float>*
store <4 x float> %broadcast.splat33, <4 x float>* %41, align 4
%42 = getelementptr inbounds float, float* %24, i64 36
%43 = bitcast float* %42 to <4 x float>*
store <4 x float> %broadcast.splat35, <4 x float>* %43, align 4
%44 = getelementptr inbounds float, float* %24, i64 40
%45 = bitcast float* %44 to <4 x float>*
store <4 x float> %broadcast.splat37, <4 x float>* %45, align 4
%46 = getelementptr inbounds float, float* %24, i64 44
%47 = bitcast float* %46 to <4 x float>*
store <4 x float> %broadcast.splat39, <4 x float>* %47, align 4
%index.next.1 = add nuw nsw i64 %index, 96
%48 = icmp eq i64 %index.next.1, 7968
br i1 %48, label %for.body, label %vector.body
for.body: ; preds = %vector.body
%arrayidx = getelementptr inbounds float, float* %data, i64 7968
store float %d, float* %arrayidx, align 4
%arrayidx.1 = getelementptr inbounds float, float* %data, i64 7969
store float %d, float* %arrayidx.1, align 4
%arrayidx.2 = getelementptr inbounds float, float* %data, i64 7970
store float %d, float* %arrayidx.2, align 4
%arrayidx.3 = getelementptr inbounds float, float* %data, i64 7971
store float %d, float* %arrayidx.3, align 4
%arrayidx.4 = getelementptr inbounds float, float* %data, i64 7972
store float %d, float* %arrayidx.4, align 4
%arrayidx.5 = getelementptr inbounds float, float* %data, i64 7973
store float %d, float* %arrayidx.5, align 4
%arrayidx.6 = getelementptr inbounds float, float* %data, i64 7974
store float %d, float* %arrayidx.6, align 4
%arrayidx.7 = getelementptr inbounds float, float* %data, i64 7975
store float %d, float* %arrayidx.7, align 4
%arrayidx.8 = getelementptr inbounds float, float* %data, i64 7976
store float %d, float* %arrayidx.8, align 4
%arrayidx.9 = getelementptr inbounds float, float* %data, i64 7977
store float %d, float* %arrayidx.9, align 4
%arrayidx.10 = getelementptr inbounds float, float* %data, i64 7978
store float %d, float* %arrayidx.10, align 4
%arrayidx.11 = getelementptr inbounds float, float* %data, i64 7979
store float %d, float* %arrayidx.11, align 4
%arrayidx.12 = getelementptr inbounds float, float* %data, i64 7980
store float %d, float* %arrayidx.12, align 4
%arrayidx.13 = getelementptr inbounds float, float* %data, i64 7981
store float %d, float* %arrayidx.13, align 4
%arrayidx.14 = getelementptr inbounds float, float* %data, i64 7982
store float %d, float* %arrayidx.14, align 4
%arrayidx.15 = getelementptr inbounds float, float* %data, i64 7983
store float %d, float* %arrayidx.15, align 4
%arrayidx.16 = getelementptr inbounds float, float* %data, i64 7984
store float %d, float* %arrayidx.16, align 4
%arrayidx.17 = getelementptr inbounds float, float* %data, i64 7985
store float %d, float* %arrayidx.17, align 4
%arrayidx.18 = getelementptr inbounds float, float* %data, i64 7986
store float %d, float* %arrayidx.18, align 4
%arrayidx.19 = getelementptr inbounds float, float* %data, i64 7987
store float %d, float* %arrayidx.19, align 4
%arrayidx.20 = getelementptr inbounds float, float* %data, i64 7988
store float %d, float* %arrayidx.20, align 4
%arrayidx.21 = getelementptr inbounds float, float* %data, i64 7989
store float %d, float* %arrayidx.21, align 4
%arrayidx.22 = getelementptr inbounds float, float* %data, i64 7990
store float %d, float* %arrayidx.22, align 4
%arrayidx.23 = getelementptr inbounds float, float* %data, i64 7991
store float %d, float* %arrayidx.23, align 4
%arrayidx.24 = getelementptr inbounds float, float* %data, i64 7992
store float %d, float* %arrayidx.24, align 4
%arrayidx.25 = getelementptr inbounds float, float* %data, i64 7993
store float %d, float* %arrayidx.25, align 4
%arrayidx.26 = getelementptr inbounds float, float* %data, i64 7994
store float %d, float* %arrayidx.26, align 4
%arrayidx.27 = getelementptr inbounds float, float* %data, i64 7995
store float %d, float* %arrayidx.27, align 4
%arrayidx.28 = getelementptr inbounds float, float* %data, i64 7996
store float %d, float* %arrayidx.28, align 4
%arrayidx.29 = getelementptr inbounds float, float* %data, i64 7997
store float %d, float* %arrayidx.29, align 4
%arrayidx.30 = getelementptr inbounds float, float* %data, i64 7998
store float %d, float* %arrayidx.30, align 4
%arrayidx.31 = getelementptr inbounds float, float* %data, i64 7999
store float %d, float* %arrayidx.31, align 4
ret void
}
|