File: period-scaling.test

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (84 lines) | stat: -rw-r--r-- 3,268 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=CHECK

// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED-RAW-PROFILE
// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003
// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED

// Check that we can use perf event filtering to generate multiple types of
// source-level profiles from a single perf profile. In this case, we generate
// a typical execution frequency profile using br_inst_retired.near_taken LBRs,
// and a branch mispredict profile using br_misp_retired.all_branches sample
// IPs.

// Check that we can use --sample-period to compute LBR and IP-based profiles
// which have comparable and absolute magnitudes. For example, in this case the
// branch of interest (at source line offset 4) is in a loop body which is
// executed ~20M times in total, and it's mispredicted about 9M times, yielding
// a mispredict rate of roughly 0.45.

// The source example below is based on perfKernelCpp/cmov_3, except a
// misleading builtin is used to persuade the compiler not to use cmov, which
// induces branch mispredicts.

// CHECK: sel_arr:652547082:0
// CHECK:  3.1: 20225766
// CHECK:  3.2: 20225766
// CHECK:  4: 19838670
// CHECK:  5: 20225766

// UNPRED: sel_arr:18000054:0
// UNPRED:  3.1: 0
// UNPRED:  3.2: 0
// UNPRED:  4: 9000027
// UNPRED:  5: 0

// CHECK-RAW-PROFILE:      3
// CHECK-RAW-PROFILE-NEXT: 2f0-2fa:9774174
// CHECK-RAW-PROFILE-NEXT: 2f0-310:10064496
// CHECK-RAW-PROFILE-NEXT: 2ff-310:10161270

// UNPRED-RAW-PROFILE:      1
// UNPRED-RAW-PROFILE-NEXT: 2fa-2fa:9000027

// original code:
// icx -fprofile-sample-generate lit.c
#include <stdlib.h>

#define N 20000
#define ITERS 10000

static int *m_s1, *m_s2, *m_s3, *m_dst;

void init(void) {
    m_s1 = malloc(sizeof(int)*N);
    m_s2 = malloc(sizeof(int)*N);
    m_s3 = malloc(sizeof(int)*N);
    m_dst = malloc(sizeof(int)*N);
    srand(42);

    for (int i = 0; i < N; i++) {
        m_s1[i] = rand() % N;
        m_s2[i] = 0;
        m_s3[i] = 1;
    }
}

void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) {
#pragma nounroll
#pragma clang loop vectorize(disable) interleave(disable)
    for (int i = 0; i < N; i++) {
        int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i];
        dst[i] = *p;
    }
}

int main(void) {
  init();
  for(int i=0; i<ITERS; ++i)
    sel_arr(m_dst, m_s1, m_s2, m_s3);
  return 0;
}