File: measure_latency.c

package info (click to toggle)
massivethreads 1.02-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,924 kB
  • sloc: ansic: 27,814; sh: 4,559; cpp: 3,334; javascript: 1,799; makefile: 1,745; python: 523; asm: 373; perl: 118; lisp: 9
file content (113 lines) | stat: -rw-r--r-- 2,550 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include <myth/myth.h>

typedef unsigned long long ts_t;

#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \
	(defined(__sparc__) && defined(__arch64__))
#define USE_RDTSC 1
#endif

#if USE_RDTSC
/* TODO : make it run on Sparc */
static inline ts_t rdtsc() {
  unsigned long long u;
#if defined(__i386__)
  unsigned long hi, lo;
  asm volatile("rdtsc" : "=a"(lo),"=d"(hi));
  u = ((unsigned long long)hi) << 32 | lo;
#elif defined(__x86_64__)
  asm volatile ("rdtsc;shlq $32,%%rdx;orq %%rdx,%%rax":"=a"(u)::"%rdx");
#elif defined(__aarch64__)
  asm volatile("mrs %0, cntvct_el0" : "=r" (u));
#elif defined(__sparc__) && defined(__arch64__)
  __asm__ __volatile__("rd %%tick, %0" : "=r" (u));
#endif
  return u;
}
static inline ts_t cur_time() {
  return rdtsc();
}

#else
ts_t cur_time() {
  struct timespec ts[1];
  clock_gettime(CLOCK_REALTIME, ts);
  return ts->tv_sec * 1000000000L + ts->tv_nsec;
}
#endif

typedef struct {
  union {
    ts_t child_started;
    char pad0[4096];
  };
  union {
    volatile int parent_resumed;
    char pad1[4096];
  };
  union {
    volatile int child_almost_finished;
    char pad2[4096];
  };
} arg_t;
 
void * f(void * arg_) {
  arg_t * arg = (arg_t *)arg_;
  arg->child_started = cur_time();
  while (! arg->parent_resumed) { }
  arg->child_almost_finished = 1;
  return 0;
}

void wait_a_while() {
  ts_t t0 = cur_time();
  while (cur_time() - t0 < 1000) { }
}

void bench(long n) {
  long i;
  ts_t child_latency_sum = 0;
  ts_t parent_latency_sum = 0;
  ts_t join_latency_sum = 0;
  arg_t arg[1];
  
  for (i = 0; i < n; i++) {
    arg->child_started = 0;
    arg->parent_resumed = 0;
    arg->child_almost_finished = 0;

    ts_t t0 = cur_time();
    myth_thread_t c = myth_create(f, arg);
    ts_t t1 = cur_time();
    arg->parent_resumed = 1;
    while (! arg->child_almost_finished) { }
    wait_a_while();
    ts_t t2 = cur_time();
    myth_join(c, 0);
    ts_t t3 = cur_time();
    child_latency_sum += (arg->child_started - t0);
    parent_latency_sum += (t1 - t0);
    join_latency_sum += (t3 - t2);
  }

  printf("avg child latency = %.9f clocks\n", child_latency_sum / (double)n);
  printf("avg parent latency = %.9f clocks\n", parent_latency_sum / (double)n);
  printf("avg join latency = %.9f clocks\n", join_latency_sum / (double)n);

}

int main(int argc, char ** argv) {
  long n = (argc > 1 ? atol(argv[1]) : 2000);
  long i;
  for (i = 0; i < 3; i++) {
    bench(n);
  }
  return 0;
}