1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
/* benchmark.h
*/
#ifndef BENCHMARKS_INCLUDE_BENCHMARK_H_
#define BENCHMARKS_INCLUDE_BENCHMARK_H_
#include <roaring/portability.h>
#include <time.h>
#ifdef ROARING_INLINE_ASM
#define CLOBBER_MEMORY __asm volatile("" ::: /* pretend to clobber */ "memory")
#else
#define CLOBBER_MEMORY
#endif
#if defined(IS_X64) && defined(ROARING_INLINE_ASM)
#define RDTSC_START(cycles) \
do { \
register unsigned cyc_high, cyc_low; \
__asm volatile( \
"cpuid\n\t" \
"rdtsc\n\t" \
"mov %%edx, %0\n\t" \
"mov %%eax, %1\n\t" \
: "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)
#define RDTSC_FINAL(cycles) \
do { \
register unsigned cyc_high, cyc_low; \
__asm volatile( \
"rdtscp\n\t" \
"mov %%edx, %0\n\t" \
"mov %%eax, %1\n\t" \
"cpuid\n\t" \
: "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)
#elif defined(__linux__) && defined(__GLIBC__)
#include <time.h>
#ifdef CLOCK_THREAD_CPUTIME_ID
#define RDTSC_START(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)
#define RDTSC_FINAL(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_REALTIME, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)
#elif defined(CLOCK_REALTIME) // #ifdef CLOCK_THREAD_CPUTIME_ID
#define RDTSC_START(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_REALTIME, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)
#define RDTSC_FINAL(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_REALTIME, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)
#else
#define RDTSC_START(cycles) \
do { \
cycles = clock(); \
} while(0)
#define RDTSC_FINAL(cycles) \
do { \
cycles = clock(); \
} while(0)
#endif // #ifdef CLOCK_THREAD_CPUTIME_ID
#else
/**
* Other architectures do not support rdtsc ?
*/
#include <time.h>
#define RDTSC_START(cycles) \
do { \
cycles = clock(); \
} while (0)
#define RDTSC_FINAL(cycles) \
do { \
cycles = clock(); \
} while (0)
#endif
/*
* Prints the best number of operations per cycle where
* test is the function call, answer is the expected answer generated by
* test, repeat is the number of times we should repeat and size is the
* number of operations represented by test.
*/
#define BEST_TIME(test, answer, repeat, size) \
do { \
printf("%s: ", #test); \
fflush(NULL); \
uint64_t cycles_start, cycles_final, cycles_diff; \
uint64_t min_diff = (uint64_t)-1; \
int wrong_answer = 0; \
for (int i = 0; i < repeat; i++) { \
CLOBBER_MEMORY; \
RDTSC_START(cycles_start); \
if (test != answer) wrong_answer = 1; \
RDTSC_FINAL(cycles_final); \
cycles_diff = (cycles_final - cycles_start); \
if (cycles_diff < min_diff) min_diff = cycles_diff; \
} \
uint64_t S = (uint64_t)size; \
float cycle_per_op = (min_diff) / (float)S; \
printf(" %.2f cycles per operation", cycle_per_op); \
if (wrong_answer) printf(" [ERROR]"); \
printf("\n"); \
fflush(NULL); \
} while (0)
/*
* This is like BEST_TIME except that ... it runs functions "test" using the
* first parameter "base" and various parameters from "testvalues" (there
* are nbrtestvalues), calling pre on base between tests
*/
#define BEST_TIME_PRE_ARRAY(base, test, pre, testvalues, nbrtestvalues) \
do { \
printf("%s %s: ", #test, #pre); \
fflush(NULL); \
uint64_t cycles_start, cycles_final, cycles_diff; \
int sum = 0; \
for (size_t j = 0; j < nbrtestvalues; j++) { \
pre(base); \
CLOBBER_MEMORY; \
RDTSC_START(cycles_start); \
test(base, testvalues[j]); \
RDTSC_FINAL(cycles_final); \
cycles_diff = (cycles_final - cycles_start); \
sum += cycles_diff; \
} \
uint64_t S = (uint64_t)nbrtestvalues; \
float cycle_per_op = sum / (float)S; \
printf(" %.2f cycles per operation", cycle_per_op); \
printf("\n"); \
fflush(NULL); \
} while (0)
#endif /* BENCHMARKS_INCLUDE_BENCHMARK_H_ */
|