1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
# This checks that shrink wrapping does not pessimize a CFG pattern where two
# blocks can be proved to have the same execution count but, because of profile
# inaccuricies, we could move saves into the second block. We can prove two
# blocks have the same frequency when B post-dominate A and A dominates B and
# are at the same loop nesting level. This would be a pessimization because
# shrink wrapping is unlikely to be able to cleanly move PUSH instructions,
# inserting additional store instructions.
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt -relocs %t.exe -o %t.out -data %t.fdata \
# RUN: -frame-opt=all -equalize-bb-counts | FileCheck %s
# Here we create a CFG pattern with two blocks A and B belonging to the same
# equivalency class as defined by dominance relations and having in theory
# the same frequency. But we tweak edge counts from profile to make block A
# hotter than block B.
.globl _start
.type _start, %function
_start:
.cfi_startproc
# Hot prologue
# FDATA: 0 [unknown] 0 1 _start 0 0 10
push %rbp
mov %rsp, %rbp
push %rbx
push %r14
subq $0x20, %rsp
b: je end_if_1
# FDATA: 1 _start #b# 1 _start #end_if_1# 0 1
if_false:
movq rel(%rip), %rdi # Add this to create a relocation and run bolt w/ relocs
c: jmp end_if_1
# Reduce frequency from 9 to 1 to simulate an inaccurate profile
# FDATA: 1 _start #c# 1 _start #end_if_1# 0 1
end_if_1:
# first uses of R14 and RBX appear at this point, possible move point for SW
mov %r14, %rdi
mov %rbx, %rdi
leaq -0x20(%rbp), %r14
movq -0x20(%rbp), %rdi
addq $0x20, %rsp
pop %r14
pop %rbx
pop %rbp
ret
.cfi_endproc
.size _start, .-_start
.data
rel: .quad end_if_1
# CHECK: BOLT-INFO: Shrink wrapping moved 0 spills inserting load/stores and 0 spills inserting push/pops
|