| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 
 | ; REQUIRES: asserts
; RUN: opt -mcpu=core-avx2 -passes=loop-unroll -debug-only=loop-unroll -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
; CHECK: Loop Unroll: F[foo] Loop %loop.header
; CHECK:   Loop Size = 25
; CHECK: UNROLLING loop %loop.header by 2 
; CHECK: Loop Unroll: F[foo] Loop %loop.2.header
; CHECK: Loop Size = 27
; CHECK-NOT: UNROLLING loop %loop.2.header
define void @foo(ptr %out) {
entry:
  %0 = alloca [1024 x i32]
  %x0 = alloca [1024 x i32]
  %x01 = alloca [1024 x i32]
  %x02 = alloca [1024 x i32]
  %x03 = alloca [1024 x i32]
  %x04 = alloca [1024 x i32]
  %x05 = alloca [1024 x i32]
  %x06 = alloca [1024 x i32]
  br label %loop.header
loop.header:
  %counter = phi i32 [0, %entry], [%inc, %loop.inc]
  br label %loop.body
loop.body:
  %ptr = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter
  store i32 %counter, ptr %ptr
  %val = add i32 %counter, 5
  %xptr = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter
  store i32 %val, ptr %xptr
  %val1 = add i32 %counter, 6
  %xptr1 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter
  store i32 %val1, ptr %xptr1
  %val2 = add i32 %counter, 7
  %xptr2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter
  store i32 %val2, ptr %xptr2
  %val3 = add i32 %counter, 8
  %xptr3 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter
  store i32 %val3, ptr %xptr3
  %val4 = add i32 %counter, 9
  %xptr4 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter
  store i32 %val4, ptr %xptr4
  %val5 = add i32 %counter, 10
  %xptr5 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter
  store i32 %val5, ptr %xptr5
  br label %loop.inc
loop.inc:
  %inc = add i32 %counter, 2
  %1 = icmp sge i32 %inc, 1023
  br i1 %1, label  %exit.0, label %loop.header
exit.0:
  %2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 5
  %3 = load i32, ptr %2
  store i32 %3, ptr %out
  br label %loop.2.header
loop.2.header:
  %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc]
  br label %loop.2.body
loop.2.body:
  %ptr.2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter.2
  store i32 %counter.2, ptr %ptr.2
  %val.2 = add i32 %counter.2, 5
  %xptr.2 = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter.2
  store i32 %val.2, ptr %xptr.2
  %val1.2 = add i32 %counter.2, 6
  %xptr1.2 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter.2
  store i32 %val1, ptr %xptr1.2
  %val2.2 = add i32 %counter.2, 7
  %xptr2.2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter.2
  store i32 %val2, ptr %xptr2.2
  %val3.2 = add i32 %counter.2, 8
  %xptr3.2 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter.2
  store i32 %val3.2, ptr %xptr3.2
  %val4.2 = add i32 %counter.2, 9
  %xptr4.2 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter.2
  store i32 %val4.2, ptr %xptr4.2
  %val5.2 = add i32 %counter.2, 10
  %xptr5.2 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter.2
  store i32 %val5.2, ptr %xptr5.2
  %xptr6.2 = getelementptr [1024 x i32], ptr %x06, i32 0, i32 %counter.2
  store i32 %val5.2, ptr %xptr6.2
  br label %loop.2.inc
loop.2.inc:
  %inc.2 = add i32 %counter.2, 2
  %4 = icmp sge i32 %inc.2, 1023
  br i1 %4, label  %exit.2, label %loop.2.header
exit.2:
  %x2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 6
  %x3 = load i32, ptr %x2
  %out2 = getelementptr i32, ptr %out, i32 1
  store i32 %3, ptr %out2
  ret void
}
 |