| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 
 | // RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=CHECK
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED-RAW-PROFILE
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED
// Check that we can use perf event filtering to generate multiple types of
// source-level profiles from a single perf profile. In this case, we generate
// a typical execution frequency profile using br_inst_retired.near_taken LBRs,
// and a branch mispredict profile using br_misp_retired.all_branches sample
// IPs.
// Check that we can use --sample-period to compute LBR and IP-based profiles
// which have comparable and absolute magnitudes. For example, in this case the
// branch of interest (at source line offset 4) is in a loop body which is
// executed ~20M times in total, and it's mispredicted about 9M times, yielding
// a mispredict rate of roughly 0.45.
// The source example below is based on perfKernelCpp/cmov_3, except a
// misleading builtin is used to persuade the compiler not to use cmov, which
// induces branch mispredicts.
// CHECK: sel_arr:652547082:0
// CHECK:  3.1: 20225766
// CHECK:  3.2: 20225766
// CHECK:  4: 19838670
// CHECK:  5: 20225766
// UNPRED: sel_arr:18000054:0
// UNPRED:  3.1: 0
// UNPRED:  3.2: 0
// UNPRED:  4: 9000027
// UNPRED:  5: 0
// CHECK-RAW-PROFILE:      3
// CHECK-RAW-PROFILE-NEXT: 2f0-2fa:9774174
// CHECK-RAW-PROFILE-NEXT: 2f0-310:10064496
// CHECK-RAW-PROFILE-NEXT: 2ff-310:10161270
// UNPRED-RAW-PROFILE:      1
// UNPRED-RAW-PROFILE-NEXT: 2fa-2fa:9000027
// original code:
// icx -fprofile-sample-generate lit.c
#include <stdlib.h>
#define N 20000
#define ITERS 10000
static int *m_s1, *m_s2, *m_s3, *m_dst;
void init(void) {
    m_s1 = malloc(sizeof(int)*N);
    m_s2 = malloc(sizeof(int)*N);
    m_s3 = malloc(sizeof(int)*N);
    m_dst = malloc(sizeof(int)*N);
    srand(42);
    for (int i = 0; i < N; i++) {
        m_s1[i] = rand() % N;
        m_s2[i] = 0;
        m_s3[i] = 1;
    }
}
void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) {
#pragma nounroll
#pragma clang loop vectorize(disable) interleave(disable)
    for (int i = 0; i < N; i++) {
        int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i];
        dst[i] = *p;
    }
}
int main(void) {
  init();
  for(int i=0; i<ITERS; ++i)
    sel_arr(m_dst, m_s1, m_s2, m_s3);
  return 0;
}
 |