1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <myth/myth.h>
typedef unsigned long long ts_t;
#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \
(defined(__sparc__) && defined(__arch64__))
#define USE_RDTSC 1
#endif
#if USE_RDTSC
/* TODO : make it run on Sparc */
static inline ts_t rdtsc() {
unsigned long long u;
#if defined(__i386__)
unsigned long hi, lo;
asm volatile("rdtsc" : "=a"(lo),"=d"(hi));
u = ((unsigned long long)hi) << 32 | lo;
#elif defined(__x86_64__)
asm volatile ("rdtsc;shlq $32,%%rdx;orq %%rdx,%%rax":"=a"(u)::"%rdx");
#elif defined(__aarch64__)
asm volatile("mrs %0, cntvct_el0" : "=r" (u));
#elif defined(__sparc__) && defined(__arch64__)
__asm__ __volatile__("rd %%tick, %0" : "=r" (u));
#endif
return u;
}
static inline ts_t cur_time() {
return rdtsc();
}
#else
ts_t cur_time() {
struct timespec ts[1];
clock_gettime(CLOCK_REALTIME, ts);
return ts->tv_sec * 1000000000L + ts->tv_nsec;
}
#endif
typedef struct {
union {
ts_t child_started;
char pad0[4096];
};
union {
volatile int parent_resumed;
char pad1[4096];
};
union {
volatile int child_almost_finished;
char pad2[4096];
};
} arg_t;
void * f(void * arg_) {
arg_t * arg = (arg_t *)arg_;
arg->child_started = cur_time();
while (! arg->parent_resumed) { }
arg->child_almost_finished = 1;
return 0;
}
void wait_a_while() {
ts_t t0 = cur_time();
while (cur_time() - t0 < 1000) { }
}
void bench(long n) {
long i;
ts_t child_latency_sum = 0;
ts_t parent_latency_sum = 0;
ts_t join_latency_sum = 0;
arg_t arg[1];
for (i = 0; i < n; i++) {
arg->child_started = 0;
arg->parent_resumed = 0;
arg->child_almost_finished = 0;
ts_t t0 = cur_time();
myth_thread_t c = myth_create(f, arg);
ts_t t1 = cur_time();
arg->parent_resumed = 1;
while (! arg->child_almost_finished) { }
wait_a_while();
ts_t t2 = cur_time();
myth_join(c, 0);
ts_t t3 = cur_time();
child_latency_sum += (arg->child_started - t0);
parent_latency_sum += (t1 - t0);
join_latency_sum += (t3 - t2);
}
printf("avg child latency = %.9f clocks\n", child_latency_sum / (double)n);
printf("avg parent latency = %.9f clocks\n", parent_latency_sum / (double)n);
printf("avg join latency = %.9f clocks\n", join_latency_sum / (double)n);
}
int main(int argc, char ** argv) {
long n = (argc > 1 ? atol(argv[1]) : 2000);
long i;
for (i = 0; i < 3; i++) {
bench(n);
}
return 0;
}
|