1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_inst_retired.near_taken:upp
// RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_inst_retired.near_taken:upp
// RUN: FileCheck %s --input-file %t --check-prefix=CHECK
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_misp_retired.all_branches:upp --leading-ip-only
// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED-RAW-PROFILE
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_misp_retired.all_branches:upp --leading-ip-only
// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED
// Check that we can use perf event filtering to generate multiple types of
// source-level profiles from a single perf profile. In this case, we generate
// a typical execution frequency profile using br_inst_retired.near_taken LBRs,
// and a branch mispredict profile using br_misp_retired.all_branches sample
// IPs.
// The source example below is based on perfKernelCpp/cmov_3, except a
// misleading builtin is used to persuade the compiler not to use cmov, which
// induces branch mispredicts.
// CHECK: sel_arr:20229:0
// CHECK: 3.1: 627
// CHECK: 3.2: 627
// CHECK: 4: 615
// CHECK: 5: 627
// UNPRED: sel_arr:18:0
// UNPRED: 3.1: 0
// UNPRED: 3.2: 0
// UNPRED: 4: 9
// UNPRED: 5: 0
// CHECK-RAW-PROFILE: 3
// CHECK-RAW-PROFILE-NEXT: 2f0-2fa:303
// CHECK-RAW-PROFILE-NEXT: 2f0-310:312
// CHECK-RAW-PROFILE-NEXT: 2ff-310:315
// UNPRED-RAW-PROFILE: 1
// UNPRED-RAW-PROFILE-NEXT: 2fa-2fa:9
// original code:
// clang -O2 -gline-tables-only -fdebug-info-for-profiling lit.c
#include <stdlib.h>
#define N 20000
#define ITERS 10000
static int *m_s1, *m_s2, *m_s3, *m_dst;
void init(void) {
m_s1 = malloc(sizeof(int)*N);
m_s2 = malloc(sizeof(int)*N);
m_s3 = malloc(sizeof(int)*N);
m_dst = malloc(sizeof(int)*N);
srand(42);
for (int i = 0; i < N; i++) {
m_s1[i] = rand() % N;
m_s2[i] = 0;
m_s3[i] = 1;
}
}
void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) {
#pragma nounroll
#pragma clang loop vectorize(disable) interleave(disable)
for (int i = 0; i < N; i++) {
int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i];
dst[i] = *p;
}
}
int main(void) {
init();
for(int i=0; i<ITERS; ++i)
sel_arr(m_dst, m_s1, m_s2, m_s3);
return 0;
}
|