1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
#ifndef __EVENT_COUNTER_H
#define __EVENT_COUNTER_H
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#endif
#include <cinttypes>
#include <cstring>
#include <chrono>
#include <vector>
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
#if __APPLE__ && __aarch64__
#include "apple_arm_events.h"
#endif
struct event_count {
std::chrono::duration<double> elapsed;
std::vector<unsigned long long> event_counts;
event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {}
event_count(const std::chrono::duration<double> _elapsed,
const std::vector<unsigned long long> _event_counts)
: elapsed(_elapsed), event_counts(_event_counts) {}
event_count(const event_count& other)
: elapsed(other.elapsed), event_counts(other.event_counts) {}
// The types of counters (so we can read the getter more easily)
enum event_counter_types {
CPU_CYCLES,
INSTRUCTIONS,
BRANCH_MISSES = 2,
BRANCH = 4
};
double elapsed_sec() const {
return std::chrono::duration<double>(elapsed).count();
}
double elapsed_ns() const {
return std::chrono::duration<double, std::nano>(elapsed).count();
}
double cycles() const {
return static_cast<double>(event_counts[CPU_CYCLES]);
}
double instructions() const {
return static_cast<double>(event_counts[INSTRUCTIONS]);
}
double branches() const { return static_cast<double>(event_counts[BRANCH]); }
double branch_misses() const {
return static_cast<double>(event_counts[BRANCH_MISSES]);
}
event_count& operator=(const event_count& other) {
this->elapsed = other.elapsed;
this->event_counts = other.event_counts;
return *this;
}
event_count operator+(const event_count& other) const {
return event_count(elapsed + other.elapsed,
{
event_counts[0] + other.event_counts[0],
event_counts[1] + other.event_counts[1],
event_counts[2] + other.event_counts[2],
event_counts[3] + other.event_counts[3],
event_counts[4] + other.event_counts[4],
});
}
void operator+=(const event_count& other) { *this = *this + other; }
};
struct event_aggregate {
bool has_events = false;
int iterations = 0;
event_count total{};
event_count best{};
event_count worst{};
event_aggregate() = default;
void operator<<(const event_count& other) {
if (iterations == 0 || other.elapsed < best.elapsed) {
best = other;
}
if (iterations == 0 || other.elapsed > worst.elapsed) {
worst = other;
}
iterations++;
total += other;
}
double elapsed_sec() const { return total.elapsed_sec() / iterations; }
double elapsed_ns() const { return total.elapsed_ns() / iterations; }
double cycles() const { return total.cycles() / iterations; }
double instructions() const { return total.instructions() / iterations; }
};
struct event_collector {
event_count count{};
std::chrono::time_point<std::chrono::steady_clock> start_clock{};
#if defined(__linux__)
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
event_collector()
: linux_events(std::vector<int>{
PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS,
}) {}
bool has_events() { return linux_events.is_working(); }
#elif __APPLE__ && __aarch64__
AppleEvents apple_events;
performance_counters diff;
event_collector() : diff(0) { apple_events.setup_performance_counters(); }
bool has_events() { return apple_events.setup_performance_counters(); }
#else
event_collector() {}
bool has_events() { return false; }
#endif
inline void start() {
#if defined(__linux)
linux_events.start();
#elif __APPLE__ && __aarch64__
if (has_events()) {
diff = apple_events.get_counters();
}
#endif
start_clock = std::chrono::steady_clock::now();
}
inline event_count& end() {
const auto end_clock = std::chrono::steady_clock::now();
#if defined(__linux)
linux_events.end(count.event_counts);
#elif __APPLE__ && __aarch64__
if (has_events()) {
performance_counters end = apple_events.get_counters();
diff = end - diff;
}
count.event_counts[0] = diff.cycles;
count.event_counts[1] = diff.instructions;
count.event_counts[2] = diff.missed_branches;
count.event_counts[3] = 0;
count.event_counts[4] = diff.branches;
#endif
count.elapsed = end_clock - start_clock;
return count;
}
};
#endif
|