File: benchmark.h

package info (click to toggle)
croaring 0.2.66%2Bds-2.2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,164 kB
  • sloc: ansic: 25,557; cpp: 1,426; sh: 403; python: 81; makefile: 11
file content (161 lines) | stat: -rw-r--r-- 6,802 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/* benchmark.h
 */

#ifndef BENCHMARKS_INCLUDE_BENCHMARK_H_
#define BENCHMARKS_INCLUDE_BENCHMARK_H_
#include <roaring/portability.h>
#include <time.h>

#ifdef ROARING_INLINE_ASM
#define CLOBBER_MEMORY __asm volatile("" ::: /* pretend to clobber */ "memory")
#else
#define CLOBBER_MEMORY
#endif

#if defined(IS_X64) && defined(ROARING_INLINE_ASM)
#define RDTSC_START(cycles)                                                   \
    do {                                                                      \
        register unsigned cyc_high, cyc_low;                                  \
        __asm volatile(                                                       \
            "cpuid\n\t"                                                       \
            "rdtsc\n\t"                                                       \
            "mov %%edx, %0\n\t"                                               \
            "mov %%eax, %1\n\t"                                               \
            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
    } while (0)

#define RDTSC_FINAL(cycles)                                                   \
    do {                                                                      \
        register unsigned cyc_high, cyc_low;                                  \
        __asm volatile(                                                       \
            "rdtscp\n\t"                                                      \
            "mov %%edx, %0\n\t"                                               \
            "mov %%eax, %1\n\t"                                               \
            "cpuid\n\t"                                                       \
            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
    } while (0)

#elif defined(__linux__) && defined(__GLIBC__)

#include <time.h>
#ifdef CLOCK_THREAD_CPUTIME_ID
#define RDTSC_START(cycles) \
  do { \
    struct timespec ts; \
    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \
    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
  } while (0)

#define RDTSC_FINAL(cycles) \
  do { \
    struct timespec ts; \
    clock_gettime(CLOCK_REALTIME, &ts); \
    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
  } while (0)

#elif defined(CLOCK_REALTIME)  // #ifdef CLOCK_THREAD_CPUTIME_ID
#define RDTSC_START(cycles) \
  do { \
    struct timespec ts; \
    clock_gettime(CLOCK_REALTIME, &ts); \
    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
  } while (0)

#define RDTSC_FINAL(cycles) \
  do { \
    struct timespec ts; \
    clock_gettime(CLOCK_REALTIME, &ts); \
    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
  } while (0)

#else
#define RDTSC_START(cycles) \
  do { \
    cycles = clock(); \
  } while(0)

#define RDTSC_FINAL(cycles) \
  do { \
    cycles = clock(); \
  } while(0)

#endif // #ifdef CLOCK_THREAD_CPUTIME_ID

#else

/**
* Other architectures do not support rdtsc ?
*/
#include <time.h>

#define RDTSC_START(cycles) \
    do {                    \
        cycles = clock();   \
    } while (0)

#define RDTSC_FINAL(cycles) \
    do {                    \
        cycles = clock();   \
    } while (0)

#endif

/*
 * Prints the best number of operations per cycle where
 * test is the function call, answer is the expected answer generated by
 * test, repeat is the number of times we should repeat and size is the
 * number of operations represented by test.
 */
#define BEST_TIME(test, answer, repeat, size)                   \
    do {                                                        \
        printf("%s: ", #test);                                  \
        fflush(NULL);                                           \
        uint64_t cycles_start, cycles_final, cycles_diff;       \
        uint64_t min_diff = (uint64_t)-1;                       \
        int wrong_answer = 0;                                   \
        for (int i = 0; i < repeat; i++) {                      \
            CLOBBER_MEMORY;                                     \
            RDTSC_START(cycles_start);                          \
            if (test != answer) wrong_answer = 1;               \
            RDTSC_FINAL(cycles_final);                          \
            cycles_diff = (cycles_final - cycles_start);        \
            if (cycles_diff < min_diff) min_diff = cycles_diff; \
        }                                                       \
        uint64_t S = (uint64_t)size;                            \
        float cycle_per_op = (min_diff) / (float)S;             \
        printf(" %.2f cycles per operation", cycle_per_op);     \
        if (wrong_answer) printf(" [ERROR]");                   \
        printf("\n");                                           \
        fflush(NULL);                                           \
    } while (0)

/*
 * This is like BEST_TIME except that ... it runs functions "test" using the
 * first parameter "base" and various parameters from "testvalues" (there
 * are nbrtestvalues), calling pre on base between tests
 */
#define BEST_TIME_PRE_ARRAY(base, test, pre, testvalues, nbrtestvalues) \
    do {                                                                \
        printf("%s %s: ", #test, #pre);                                 \
        fflush(NULL);                                                   \
        uint64_t cycles_start, cycles_final, cycles_diff;               \
        int sum = 0;                                                    \
        for (size_t j = 0; j < nbrtestvalues; j++) {                    \
            pre(base);                                                  \
            CLOBBER_MEMORY;                                             \
            RDTSC_START(cycles_start);                                  \
            test(base, testvalues[j]);                                  \
            RDTSC_FINAL(cycles_final);                                  \
            cycles_diff = (cycles_final - cycles_start);                \
            sum += cycles_diff;                                         \
        }                                                               \
        uint64_t S = (uint64_t)nbrtestvalues;                           \
        float cycle_per_op = sum / (float)S;                            \
        printf(" %.2f cycles per operation", cycle_per_op);             \
        printf("\n");                                                   \
        fflush(NULL);                                                   \
    } while (0)

#endif /* BENCHMARKS_INCLUDE_BENCHMARK_H_ */