File: event_counter.h

package info (click to toggle)
simdjson 4.3.1-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 31,396 kB
  • sloc: cpp: 195,760; ansic: 20,954; sh: 1,126; python: 885; makefile: 47; ruby: 25; javascript: 13
file content (201 lines) | stat: -rw-r--r-- 5,730 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#ifndef __EVENT_COUNTER_H
#define __EVENT_COUNTER_H

#ifndef SIMDJSON_SIMPLE_PERFORMANCE_COUNTERS
#ifdef __aarch64__
// on ARM, we use just cycles and instructions
#define SIMDJSON_SIMPLE_PERFORMANCE_COUNTERS 1
#else
// elsewhere, we try to use four counters.
#define SIMDJSON_SIMPLE_PERFORMANCE_COUNTERS 0
#endif
#endif
#include <cassert>
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#endif
#include <unistd.h>
#include <cinttypes>

#include <cstdio>
#include <cstdlib>
#include <cstring>

#include <algorithm>
#include <chrono>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>

#ifdef __linux__
#include "linux-perf-events.h"
#include <libgen.h>
#endif

#if __APPLE__ &&  __aarch64__
#include "apple/apple_arm_events.h"
#endif

#include "simdjson.h"

using std::string;
using std::vector;
using std::chrono::steady_clock;
using std::chrono::time_point;
using std::chrono::duration;

struct event_count {
  duration<double> elapsed;
  vector<unsigned long long> event_counts;
  event_count() : elapsed(0), event_counts{0,0,0,0,0} {}
  event_count(const duration<double> _elapsed, const vector<unsigned long long> _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {}
  event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { }

  // The types of counters (so we can read the getter more easily)
  #if SIMDJSON_SIMPLE_PERFORMANCE_COUNTERS
  enum event_counter_types {
    CPU_CYCLES,
    INSTRUCTIONS
  };
  #else
  enum event_counter_types {
    CPU_CYCLES,
    INSTRUCTIONS,
    BRANCH_MISSES,
    CACHE_REFERENCES,
    CACHE_MISSES
  };
  #endif
  double elapsed_sec() const { return duration<double>(elapsed).count(); }
  double elapsed_ns() const { return duration<double, std::nano>(elapsed).count(); }
  double cycles() const { return static_cast<double>(event_counts[CPU_CYCLES]); }
  double instructions() const { return static_cast<double>(event_counts[INSTRUCTIONS]); }
#if !SIMDJSON_SIMPLE_PERFORMANCE_COUNTERS
  double branch_misses() const { return static_cast<double>(event_counts[BRANCH_MISSES]); }
  double cache_references() const { return static_cast<double>(event_counts[CACHE_REFERENCES]); }
  double cache_misses() const { return static_cast<double>(event_counts[CACHE_MISSES]); }
#endif
  event_count& operator=(const event_count& other) {
    this->elapsed = other.elapsed;
    this->event_counts = other.event_counts;
    return *this;
  }
  event_count operator+(const event_count& other) const {
    return event_count(elapsed+other.elapsed, {
      event_counts[0]+other.event_counts[0],
      event_counts[1]+other.event_counts[1],
      event_counts[2]+other.event_counts[2],
      event_counts[3]+other.event_counts[3],
      event_counts[4]+other.event_counts[4],
    });
  }

  void operator+=(const event_count& other) {
    *this = *this + other;
  }
};

struct event_aggregate {
  int iterations = 0;
  event_count total{};
  event_count best{};
  event_count worst{};

  event_aggregate() {}

  void operator<<(const event_count& other) {
    if (iterations == 0 || other.elapsed < best.elapsed) {
      best = other;
    }
    if (iterations == 0 || other.elapsed > worst.elapsed) {
      worst = other;
    }
    iterations++;
    total += other;
  }

  double elapsed_sec() const { return total.elapsed_sec() / iterations; }
  double total_elapsed_ns() const { return total.elapsed_ns(); }
  double elapsed_ns() const { return total.elapsed_ns() / iterations; }
  double cycles() const { return total.cycles() / iterations; }
  double instructions() const { return total.instructions() / iterations; }
#if !SIMDJSON_SIMPLE_PERFORMANCE_COUNTERS
  double branch_misses() const { return total.branch_misses() / iterations; }
  double cache_references() const { return total.cache_references() / iterations; }
  double cache_misses() const { return total.cache_misses() / iterations; }
#endif
};

struct event_collector {
  event_count count{};
  time_point<steady_clock> start_clock{};

#if defined(__linux__)
  LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
  event_collector() : linux_events(vector<int>{
  #if SIMDJSON_SIMPLE_PERFORMANCE_COUNTERS
    PERF_COUNT_HW_CPU_CYCLES,
    PERF_COUNT_HW_INSTRUCTIONS,
  #else
    PERF_COUNT_HW_CPU_CYCLES,
    PERF_COUNT_HW_INSTRUCTIONS,
    PERF_COUNT_HW_BRANCH_MISSES,
    PERF_COUNT_HW_CACHE_REFERENCES,
    PERF_COUNT_HW_CACHE_MISSES
  #endif
  }) {}
  bool has_events() {
    return linux_events.is_working();
  }
#elif __APPLE__ &&  __aarch64__
  AppleEvents apple_events;
  performance_counters diff;
  event_collector() : diff(0) {
    apple_events.setup_performance_counters();
  }
  bool has_events() {
    return apple_events.setup_performance_counters();
  }
#else
  event_collector() {}
  bool has_events() {
    return false;
  }
#endif

  simdjson_inline void start() {
#if defined(__linux)
    linux_events.start();
#elif __APPLE__ &&  __aarch64__
    if(has_events()) { diff = apple_events.get_counters(); }
#endif
    start_clock = steady_clock::now();
  }
  simdjson_inline event_count& end() {
    time_point<steady_clock> end_clock = steady_clock::now();
#if defined(__linux)
    linux_events.end(count.event_counts);
#elif __APPLE__ &&  __aarch64__
    if(has_events()) {
      performance_counters end = apple_events.get_counters();
      diff = end - diff;
    }
    count.event_counts[0] = diff.cycles;
    count.event_counts[1] = diff.instructions;
    count.event_counts[2] = diff.missed_branches;
    count.event_counts[3] = 0;
    count.event_counts[4] = 0;
#endif
    count.elapsed = end_clock - start_clock;
    return count;
  }
};

#endif