File: vpinstruction-cost.ll

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.6-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,304 kB
  • sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (74 lines) | stat: -rw-r--r-- 4,113 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of"
; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s

; REQUIRES: asserts

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) {
; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction'
; CHECK:  Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1
; CHECK:  Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
; CHECK:  Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
; CHECK:  Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK:  Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
; CHECK:  Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
; CHECK:  Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
; CHECK:  Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src>
; CHECK:  Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
; CHECK:  Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
; CHECK:  Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
; CHECK:  Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
; CHECK:  Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
; CHECK:  Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
; CHECK:  Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
; CHECK:  Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
; CHECK:  Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>
; CHECK:  Cost of 0 for VF 2: vector loop backedge
; CHECK:  Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
; CHECK:  Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
; CHECK:  Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
; CHECK:  Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK:  Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
; CHECK:  Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
; CHECK:  Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
; CHECK:  Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src>
; CHECK:  Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
; CHECK:  Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
; CHECK:  Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
; CHECK:  Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
; CHECK:  Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
; CHECK:  Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
; CHECK:  Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
; CHECK:  Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
; CHECK:  Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>
; CHECK:  Cost of 0 for VF 4: vector loop backedge
; CHECK:  Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
; CHECK:  Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
; CHECK:  Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
;
entry:
  br label %loop.header

loop.header:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
  %g.src = getelementptr inbounds i64, ptr %src, i64 %iv
  %l = load i64, ptr %g.src
  %iv.4 = add nuw nsw i64 %iv, 4
  %c = icmp ule i64 %l, 128
  br i1 %c, label %loop.then, label %loop.latch

loop.then:
  %or = or disjoint i64 %iv.4, 1
  %g.dst = getelementptr inbounds i64, ptr %dst, i64 %or
  store i64 %iv.4, ptr %g.dst, align 4
  br label %loop.latch

loop.latch:
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, 32
  br i1 %exitcond, label %exit, label %loop.header

exit:
  ret void
}