1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; Check delinearization in loop cache analysis can handle fixed-size arrays.
; The IR is copied from llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
; CHECK: Loop 'for.body' has cost = 4186116
; CHECK-NEXT: Loop 'for.body4' has cost = 128898
;; #define N 1024
;; #define M 2048
;; void t1(int a[N][M]) {
;; for (int i = 0; i < N-1; ++i)
;; for (int j = 2; j < M; ++j)
;; a[i][j] = a[i+1][j-2];
;; }
define void @t1([2048 x i32]* %a) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.inc11
%indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
br label %for.body4
for.body4: ; preds = %for.body, %for.body4
%indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
%0 = add nuw nsw i64 %indvars.iv4, 1
%1 = add nsw i64 %indvars.iv, -2
%arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
%2 = load i32, i32* %arrayidx6, align 4
%a_gep = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 0
%arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a_gep, i64 %indvars.iv4, i64 %indvars.iv
store i32 %2, i32* %arrayidx10, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 2048
br i1 %exitcond, label %for.body4, label %for.inc11
for.inc11: ; preds = %for.body4
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
br i1 %exitcond7, label %for.body, label %for.end13
for.end13: ; preds = %for.inc11
ret void
}
; CHECK: Loop 'for.body' has cost = 4186116
; CHECK-NEXT: Loop 'for.body4' has cost = 128898
define void @t2([2048 x i32]* %a) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.inc11
%indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
br label %for.body4
for.body4: ; preds = %for.body, %for.body4
%indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
%0 = add nuw nsw i64 %indvars.iv4, 1
%1 = add nsw i64 %indvars.iv, -2
%arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
%2 = load i32, i32* %arrayidx6, align 4
%call = call [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %a)
%arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %call, i64 %indvars.iv4, i64 %indvars.iv
store i32 %2, i32* %arrayidx10, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 2048
br i1 %exitcond, label %for.body4, label %for.inc11
for.inc11: ; preds = %for.body4
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
br i1 %exitcond7, label %for.body, label %for.end13
for.end13: ; preds = %for.inc11
ret void
}
declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg)
; CHECK: Loop 'for.body' has cost = 2112128815104000000
; CHECK-NEXT: Loop 'for.body4' has cost = 16762927104000000
; CHECK-NEXT: Loop 'for.body8' has cost = 130960368000000
; CHECK-NEXT: Loop 'for.body12' has cost = 1047682944000
; CHECK-NEXT: Loop 'for.body16' has cost = 32260032000
;; #define N 128
;; #define M 2048
;; void t3(int a[][N][N][N][M]) {
;; for (int i1 = 0; i1 < N-1; ++i1)
;; for (int i2 = 2; i2 < N; ++i2)
;; for (int i3 = 0; i3 < N; ++i3)
;; for (int i4 = 3; i4 < N; ++i4)
;; for (int i5 = 0; i5 < M-2; ++i5)
;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
;; }
define void @t3([128 x [128 x [128 x [2048 x i32]]]]* %a) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.inc46
%indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ]
br label %for.body4
for.body4: ; preds = %for.body, %for.inc43
%indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ]
br label %for.body8
for.body8: ; preds = %for.body4, %for.inc40
%indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ]
br label %for.body12
for.body12: ; preds = %for.body8, %for.inc37
%indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ]
br label %for.body16
for.body16: ; preds = %for.body12, %for.body16
%indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ]
%0 = add nuw nsw i64 %indvars.iv18, 1
%1 = add nsw i64 %indvars.iv14, -2
%2 = add nsw i64 %indvars.iv7, -3
%3 = add nuw nsw i64 %indvars.iv, 2
%arrayidx26 = getelementptr inbounds [128 x [128 x [128 x [2048 x i32]]]], [128 x [128 x [128 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
%4 = load i32, i32* %arrayidx26, align 4
%arrayidx36 = getelementptr inbounds [128 x [128 x [128 x [2048 x i32]]]], [128 x [128 x [128 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
store i32 %4, i32* %arrayidx36, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 2046
br i1 %exitcond, label %for.body16, label %for.inc37
for.inc37: ; preds = %for.body16
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
%exitcond10 = icmp ne i64 %indvars.iv.next8, 128
br i1 %exitcond10, label %for.body12, label %for.inc40
for.inc40: ; preds = %for.inc37
%indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
%exitcond13 = icmp ne i64 %indvars.iv.next12, 128
br i1 %exitcond13, label %for.body8, label %for.inc43
for.inc43: ; preds = %for.inc40
%indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
%exitcond17 = icmp ne i64 %indvars.iv.next15, 128
br i1 %exitcond17, label %for.body4, label %for.inc46
for.inc46: ; preds = %for.inc43
%indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
%exitcond21 = icmp ne i64 %indvars.iv.next19, 127
br i1 %exitcond21, label %for.body, label %for.end48
for.end48: ; preds = %for.inc46
ret void
}
|