File: time_boxmuller.cpp

package info (click to toggle)
python-bumps 0.9.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 10,776 kB
  • sloc: python: 24,586; ansic: 4,973; cpp: 4,849; javascript: 639; xml: 493; makefile: 143; perl: 108; sh: 94
file content (132 lines) | stat: -rw-r--r-- 3,969 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// Test for boxmuller.h on CPU
#include <Random123/philox.h>
#include <Random123/threefry.h>
#include "boxmuller.hpp"
#include "util.h"   // for timer()

typedef r123::Philox4x32 CBRNGF;
typedef r123::Threefry2x64 CBRNGD;

const char *progname = "time_boxmuller";

// Each call to boxmuller() returns a pair of values in the .x and .y
// members, which we add up into sum just to avoid being optimized away.
template <typename CBRNG, typename F, typename F2>
F timedloop(typename CBRNG::ukey_type k, size_t Ntry){
    F sum = 0.f;
    typename CBRNG::ctr_type ctr = {{}};
    const size_t csize = sizeof(ctr)/sizeof(ctr[0]);
    CBRNG rng;

    for(size_t i=0; i<Ntry; i+=csize){
        ctr.incr();
        typename CBRNG::ctr_type u = rng(ctr, k);
	F2 f2;
	switch(csize) {
	case 8: f2 = r123::boxmuller(u[6], u[7]); sum += f2.x + f2.y;
                f2 = r123::boxmuller(u[4], u[5]); sum += f2.x + f2.y;
	case 4: f2 = r123::boxmuller(u[2], u[3]); sum += f2.x + f2.y;
	case 2: f2 = r123::boxmuller(u[0], u[1]); sum += f2.x + f2.y;
                break;
	default:
	        R123_ASSERT(0);
	}
    }
    return sum;
}

template <typename CBRNG, typename F2>
void dumploop(FILE *fp, typename CBRNG::ukey_type k, size_t Ntry){
    typename CBRNG::ctr_type ctr = {{}};
    const size_t csize = sizeof(ctr)/sizeof(ctr[0]);
    CBRNG rng;

    for(size_t i=0; i<Ntry; i+=csize){
        ctr.incr();
        typename CBRNG::ctr_type u = rng(ctr, k);
	F2 f2;
	switch(csize) {
	case 8: f2 = r123::boxmuller(u[6], u[7]); fprintf(fp, "%g\n%g\n", f2.x, f2.y);
                f2 = r123::boxmuller(u[4], u[5]); fprintf(fp, "%g\n%g\n", f2.x, f2.y);
	case 4: f2 = r123::boxmuller(u[2], u[3]); fprintf(fp, "%g\n%g\n", f2.x, f2.y);
	case 2: f2 = r123::boxmuller(u[0], u[1]); fprintf(fp, "%g\n%g\n", f2.x, f2.y); break;
	default:
	    R123_ASSERT(0);
	}
    }
}

#define NREPEAT 20

template <typename CBRNG, typename F, typename F2>
void timedcall(const char *tname, typename CBRNG::ukey_type k, size_t Ntry, char *out_fname) {
    double cur_time, dt;
    F sums[NREPEAT];
    int i;
    FILE *fp;
    char *fname;
    if (out_fname) {
	fname = (char *) malloc(strlen(out_fname) + strlen(tname) + 2);
	CHECKNOTZERO(fname);
	sprintf(fname, "%s-%s", out_fname, tname);
	fp = fopen(fname, "w");
	CHECKNOTZERO(fp);
    } else {
	fname = NULL;
	fp = NULL;
    }
    (void) timer(&cur_time);
    /*
     * we call timedloop NREPEAT times so that it is easy to keep
     * Ntry the same for boxmuller.cu and boxmuller.cpp, so sum[0]
     * can be checked.
     */
    for (i = 0; i < NREPEAT; i++) {
	k.v[sizeof(k)/sizeof(k.v[0])-1] = i;
	if (fp)
	    dumploop<CBRNG, F2>(fp, k, Ntry);
	else
	    sums[i] = timedloop<CBRNG, F, F2>(k, Ntry);
    }
    dt = timer(&cur_time);
    if (fp) {
	printf("%s %lu written to %s in %g sec: %gM/sec\n", tname, (unsigned long)(Ntry*NREPEAT), fname, dt, Ntry*NREPEAT*1.e-6/dt);
	fclose(fp);
	free(fname);
    } else {
	printf("%s %lu in %g sec: %gM/sec, sum = %g\n", tname, (unsigned long)(Ntry*NREPEAT), dt, Ntry*NREPEAT*1.e-6/dt, sums[0]);
	for (i = 1; i < NREPEAT; i++) {
	    printf(" %g", sums[i]);
	}
	printf("\n");
    }
}

const size_t DEF_N = 200000;

int main(int argc, char **argv){
    CBRNGF::ukey_type keyf = {{}};
    CBRNGD::ukey_type keyd = {{}};
    size_t Ntry = DEF_N;
    char *dumpfname;
    
    dumpfname = getenv("BOXMULLER_DUMPFILE");
    if(argc>1) {
	if (argv[1][0] == '-') {
	    fprintf(stderr, "Usage: %s [iterations_per_thread [key0 [key1]]]\n", argv[0]);
	    exit(1);
	}
        Ntry = atol(argv[1]);
    }
    for (int i = 0; i < (int)(sizeof(keyf)/sizeof(keyf[0])-1) && 2+i < argc; i++) {
	keyf.v[i] = atol(argv[2+i]);
    }
    for (int i = 0; i < (int)(sizeof(keyd)/sizeof(keyd[0])-1) && 2+i < argc; i++) {
	keyd.v[i] = atol(argv[2+i]);
    }

    timedcall<CBRNGF,float,r123::float2>("float", keyf, Ntry, dumpfname);
    timedcall<CBRNGD,double,r123::double2>("double", keyd, Ntry, dumpfname);
    return 0;
}