File: mixmax.icc

package info (click to toggle)
bornagain 1.18.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 118,800 kB
  • sloc: cpp: 469,684; python: 38,920; xml: 805; awk: 630; sh: 286; ansic: 37; makefile: 25
file content (495 lines) | stat: -rw-r--r-- 16,861 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
/*
 *  mixmax.c
 *  A Pseudo-Random Number Generator
 *
 *  Created by Konstantin Savvidy.
 *
 *  The code is released under GNU Lesser General Public License v3
 *
 *	G.K.Savvidy and N.G.Ter-Arutyunian, 
 *  On the Monte Carlo simulation of physical systems,
 *	J.Comput.Phys. 97, 566 (1991);
 *  Preprint EPI-865-16-86, Yerevan, Jan. 1986
 *
 *  K.Savvidy
 *  The MIXMAX random number generator
 *  Comp. Phys. Commun. 196 (2015), pp 161–165
 *  http://dx.doi.org/10.1016/j.cpc.2015.06.003
 *
 *  K.Savvidy and G.Savvidy
 *  Spectrum and Entropy of C-systems. MIXMAX random number generator
 *  Chaos, Solitons & Fractals, Volume 91, (2016) pp. 33–38
 *  http://dx.doi.org/10.1016/j.chaos.2016.05.003
 *
 */

#ifdef __MIXMAX_C
  #error "You should not define __MIXMAX_C. Please #include mixmax.h"
#endif

#define __MIXMAX_C

#include "mixmax.h"

int iterate(rng_state_t* X){
	X->sumtot = iterate_raw_vec(X->V, X->sumtot);
	return 0;
}

#if (SPECIALMUL!=0)
inline uint64_t MULWU (uint64_t k){ return (( (k)<<(SPECIALMUL) & M61) | ( (k) >> (BITS-SPECIALMUL))  )  ;}
#elif (SPECIALMUL==0)
inline uint64_t MULWU (uint64_t /* k */ ){ return 0;}
#else
#error SPECIALMUL not defined
#endif

myuint iterate_raw_vec(myuint* Y, myuint sumtotOld){
	// operates with a raw vector, uses known sum of elements of Y
	int i;
#if (SPECIAL!=0)
    myuint temp2 = Y[1];
#endif
	myuint  tempP, tempV;
    Y[0] = ( tempV = sumtotOld);
    myuint sumtot = Y[0], ovflow = 0; // will keep a running sum of all new elements
	tempP = 0;              // will keep a partial sum of all old elements
	for (i=1; i<N; i++){
#if (SPECIALMUL!=0)
        myuint tempPO = MULWU(tempP);
        tempP = modadd(tempP,Y[i]);
        tempV = MOD_MERSENNE(tempV + tempP + tempPO); // edge cases ?
#else
        tempP = modadd(tempP , Y[i]);
        tempV = modadd(tempV , tempP);
#endif
        Y[i] = tempV;
		sumtot += tempV; if (sumtot < tempV) {ovflow++;}
	}
#if (SPECIAL!=0)
    temp2 = MOD_MULSPEC(temp2);
    Y[2] = modadd( Y[2] , temp2 );
    sumtot += temp2; if (sumtot < temp2) {ovflow++;}
#endif
	return MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
}

myuint get_next(rng_state_t* X) {
    return GET_BY_MACRO(X);
}

double get_next_float(rng_state_t* X){
    return get_next_float_BY_MACRO(X);
}

void fill_array(rng_state_t* X, unsigned int n, double *array)
{
    // Return an array of n random numbers uniformly distributed in (0,1]
    unsigned int i,j;
    const int M=N-1;
    for (i=0; i<(n/M); i++){
        iterate_and_fill_array(X, array+i*M);
    }
    unsigned int rem=(n % M);
    if (rem) {
        iterate(X);
        for (j=0; j< (rem); j++){
            array[M*i+j] = (int64_t)X->V[j] * (double)(INV_MERSBASE);
        }
        X->counter = j; // needed to continue with single fetches from the exact spot, but if you only use fill_array to get numbers then it is not necessary
    }else{
        X->counter = N;
    }
}

void iterate_and_fill_array(rng_state_t* X, double *array){
    myuint* Y=X->V;
    int i;
    myuint  tempP, tempV;
#if (SPECIAL != 0)
    myuint temp2 = Y[1];
#endif
    Y[0] = (tempV = X->sumtot);
    //array[0] = (double)tempV * (double)(INV_MERSBASE);
    myuint sumtot = Y[0], ovflow = 0; // will keep a running sum of all new elements
    tempP = 0;             // will keep a partial sum of all old elements
    for (i=1; i<N; i++){
#if (SPECIALMUL!=0)
        myuint tempPO = MULWU(tempP);
        tempP = modadd(tempP,Y[i]);
        tempV = MOD_MERSENNE(tempV + tempP + tempPO); // edge cases ?
#else
        tempP = MOD_MERSENNE(tempP + Y[i]);
        tempV = MOD_MERSENNE(tempV + tempP);
#endif
        Y[i] = tempV;
        sumtot += tempV; if (sumtot < tempV) {ovflow++;}
        array[i-1] = (int64_t)tempV * (double)(INV_MERSBASE);
    }
#if (SPECIAL!=0)
    temp2 = MOD_MULSPEC(temp2);
    Y[2] = modadd( Y[2] , temp2 );
    sumtot += temp2; if (sumtot < temp2) {ovflow++;}
#endif
    X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
}

myuint modadd(myuint foo, myuint bar){
#if (defined(__x86_64__) || defined(__i386__)) &&  defined(__GNUC__) && defined(USE_INLINE_ASM)
//#warning Using assembler routine in modadd
    myuint out;
    /* Assembler trick suggested by Andrzej Görlich     */
    __asm__ ("addq %2, %0; "
             "btrq $61, %0; "
             "adcq $0, %0; "
             :"=r"(out)
             :"0"(foo), "r"(bar)
             );
    return out;
#else
    return MOD_MERSENNE(foo+bar);
#endif
}

rng_state_t* rng_alloc()
{
/* allocate the state */
	rng_state_t  *p = (rng_state_t*)malloc(sizeof(rng_state_t)); 
	p->fh=NULL; // by default, set the output file handle to stdout  
	return p;
}

int rng_free(rng_state_t* X) /* free the memory occupied by the state */
{
	free(X);
	return 0;
}

rng_state_t*  rng_copy(myuint *Y)
{
	/* copy the vector stored at Y, and return pointer to the newly allocated and initialized state.  
	 It is the user's responsibility  to make sure that Y is properly allocated with rng_alloc, 
	 then pass Y->V or it can also be an array -- such as myuint Y[N+1] and Y[1]...Y[N] have been set to legal values [0 .. MERSBASE-1]
	 Partial sums on this new state are recalculated, and counter set to zero, so that when get_next is called, 
	 it will output the initial vector before any new numbers are produced, call iterate(X) if you want to advance right away */
	rng_state_t* X = rng_alloc();
    myuint sumtot=0,ovflow=0;
	X->counter = 2;
    int i;
	for ( i=0; i < N; i++){
		X->V[i] = Y[i];
        sumtot += X->V[(i)]; if (sumtot < X->V[(i)]) {ovflow++;}

	}
	X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
	return X;
}

void seed_vielbein(rng_state_t* X, unsigned int index)	
{
int i;
	if (index<N){
		for (i=0; i < N; i++){
			X->V[i] = 0;
		}
		X->V[index] = 1;
	}else{
		fprintf(stderr, "Out of bounds index, is not ( 0 <= index < N  )\n"); exit(ARRAY_INDEX_OUT_OF_BOUNDS);
	}
	X->counter = N;  // set the counter to N if iteration should happen right away
	//precalc(X);
    X->sumtot = 1; //(index ? 1:0);
	if (X->fh==NULL){X->fh=stdout;}	
}

void seed_spbox(rng_state_t* X, myuint seed)
{ // a 64-bit LCG from Knuth line 26, in combination with a bit swap is used to seed
	const myuint MULT64=6364136223846793005ULL; 
	int i;
    myuint sumtot=0,ovflow=0;
	if (seed == 0){
		fprintf(stderr, " try seeding with nonzero seed next time!\n");
		exit(SEED_WAS_ZERO);
	}
	
	myuint l = seed;

	//X->V[0] = l & MERSBASE;
	if (X->fh==NULL){X->fh=stdout;} // if the filehandle is not yet set, make it stdout
	for (i=0; i < N; i++){
		l*=MULT64; l = (l << 32) ^ (l>>32);
		X->V[i] = l & MERSBASE;
        sumtot += X->V[(i)]; if (sumtot < X->V[(i)]) {ovflow++;}
	}
	X->counter = N;  // set the counter to N if iteration should happen right away
    X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
}

myuint precalc(rng_state_t* X){
	int i;
	myuint temp;
	temp = 0;
	for (i=0; i < N; i++){
		temp = MOD_MERSENNE(temp + X->V[i]);
	}	
	X->sumtot = temp; 
	return temp;
}


int rng_get_N(void){return N;}

#if defined(__x86_64__)
inline myuint mod128(__uint128_t s){
    myuint s1;
    s1 = ( (  ((myuint)s)&MERSBASE )    + (  ((myuint)(s>>64)) * 8 )  + ( ((myuint)s) >>BITS) );
    return	MOD_MERSENNE(s1);
}

inline myuint fmodmulM61(myuint cum, myuint a, myuint b){
	__uint128_t temp;
	temp = (__uint128_t)a*(__uint128_t)b + cum;
	return mod128(temp);
}

#else // on all other platforms, including 32-bit linux, PPC and PPC64 and all Windows
#define MASK32 0xFFFFFFFFULL

inline myuint fmodmulM61(myuint cum, myuint s, myuint a)
{
    register myuint o,ph,pl,ah,al;
    o=(s)*a;
    ph = ((s)>>32);
    pl = (s) & MASK32;
    ah = a>>32;
    al = a & MASK32;
    o = (o & M61) + ((ph*ah)<<3) + ((ah*pl+al*ph + ((al*pl)>>32))>>29) ;
    o += cum;
    o = (o & M61) + ((o>>61));
    return o;
}
#endif

void print_state(rng_state_t* X){
    int j;
    fprintf(X->fh, "mixmax state, file version 1.0\n" );
    fprintf(X->fh, "N=%u; V[N]={", rng_get_N() );
	for (j=0; (j< (rng_get_N()-1) ); j++) {
		fprintf(X->fh, "%llu, ", X->V[j] );
	}
	fprintf(X->fh, "%llu", X->V[rng_get_N()-1] );
    fprintf(X->fh, "}; " );
    fprintf(X->fh, "counter=%u; ", X->counter );
    fprintf(X->fh, "sumtot=%llu;\n", X->sumtot );
}

void read_state(rng_state_t* X, const char filename[] ){
    // a function for reading the state from a file, after J. Apostolakis
    FILE* fin;
    if(  ( fin = fopen(filename, "r") ) ){
        int l=0;
        while ( l != '{' ) { // 0x7B = "{"
            l=fgetc(fin); // proceed until hitting opening bracket
        }
        ungetc(' ', fin);
    }else{
        fprintf(stderr, "mixmax -> read_state: error reading file %s\n", filename);
        exit(ERROR_READING_STATE_FILE);
    }
    
    myuint vecVal;
    //printf("mixmax -> read_state: starting to read state from file\n");
    if (!fscanf(fin, "%llu", &X->V[0]) ) {fprintf(stderr, "mixmax -> read_state: error reading file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
     //printf("V[%d] = %llu\n",0, X->V[0]);
    int i;
    for( i = 1; i < rng_get_N(); i++){
        if (!fscanf(fin, ", %llu", &vecVal) ) {fprintf(stderr, "mixmax -> read_state: error reading vector component i=%d from file %s\n", i, filename); exit(ERROR_READING_STATE_FILE);}
        //printf("V[%d] = %llu\n",i, vecVal);
        if(  vecVal <= MERSBASE ){
            X->V[i] = vecVal;
        }else{
            fprintf(stderr, "mixmax -> read_state: Invalid state vector value= %llu"
                    " ( must be less than %llu ) "
                    " obtained from reading file %s\n"
                    , vecVal, MERSBASE, filename);
            
        }
    }
    
    unsigned int counter;
    if (!fscanf( fin, "}; counter=%u; ", &counter)){fprintf(stderr, "mixmax -> read_state: error reading counter from file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
    if( counter <= N ) {
        X->counter= counter;
    }else{
        fprintf(stderr, "mixmax -> read_state: Invalid counter = %d"
                "  Must be 0 <= counter < %u\n" , counter, N);
        print_state(X);
        exit(ERROR_READING_STATE_COUNTER);
    }
    precalc(X);
    myuint sumtot;
    if (!fscanf( fin, "sumtot=%llu\n", &sumtot)){fprintf(stderr, "mixmax -> read_state: error reading checksum from file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
    
    if (X->sumtot != sumtot) {
        fprintf(stderr, "mixmax -> checksum error while reading state from file %s - corrupted?\n", filename);
        exit(ERROR_READING_STATE_CHECKSUM);
    }
    else{fprintf(stderr, "mixmax -> read_state: checksum ok: %llu == %llu\n",X->sumtot,  sumtot);}
    fclose(fin);
}


#define FUSEDMODMULVEC \
{ for (i =0; i<N; i++){         \
cum[i] =  fmodmulM61( cum[i], coeff ,  Y[i] ) ; \
} }


#define SKIPISON 1
#define OLDSKIP 0

#if ( ( (N==8) || (N==17) || (N==240) ||(N==120) || (N==256) ) && BITS==61 && SKIPISON!=0)
#if (OLDSKIP==1)
#warning Compiling with normal skip
void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t  streamID ){
		seed_vielbein(Xin,0);
		Xin->sumtot = apply_bigskip(Xin->V, Xin->V,  clusterID,  machineID,  runID,   streamID );
	if (Xin->fh==NULL){Xin->fh=stdout;} // if the filehandle is not yet set, make it stdout
  Xin->counter = 1;
}
#else
void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t  streamID ){
//#warning Compiling with cached skip
    
    /*  Kostas Savvidis - 2016-04-27
     A caching implementation of seed_uniquestream which
     will keep the previous vector and update it in accordance with increment of the seed,
     so that if the user increments the seed vector by 1, the whole
     vector can be reused next time and skip just by 1 unit, see below.
     This is believed to be thread safe due to the __thread declaration of the cached variables.
     */
    static __thread myuint Vcache[N]={1};
    static __thread myuint sumtmp=0;
    static __thread myID_t ID1cached=0, ID2cached=0, ID3cached=0, ID4cached=0;
    
    if ( ID1cached <=  clusterID &&  ID2cached <= machineID && ID3cached <= runID && ID4cached <= streamID){ // proceed only if the the cached seed value is less than the new seed value
        
        sumtmp = apply_bigskip(Vcache, Vcache,  clusterID - ID1cached,  machineID - ID2cached,  runID - ID3cached,   streamID - ID4cached );
        
        ID1cached =  clusterID;  ID2cached = machineID ; ID3cached = runID ; ID4cached = streamID;  // increment the cached seed value
        for (int i=0; i<N; i++) { Xin->V[i] = Vcache[i] ; }         // copy to destination
        Xin->sumtot = sumtmp;
    }else{
        seed_vielbein(Xin,0);
        Xin->sumtot = apply_bigskip(Xin->V, Xin->V,  clusterID,  machineID,  runID,   streamID );
        for (int i=0; i<N; i++) { Vcache[i]=Xin->V[i]; }
        ID1cached =  clusterID;  ID2cached = machineID ; ID3cached = runID ; ID4cached = streamID;
    }
    Xin->counter = 1;
}


#endif

void branch_inplace( rng_state_t* Xin, myID_t* IDvec ){
	Xin->sumtot = apply_bigskip(Xin->V, Xin->V,  IDvec[3],  IDvec[2],  IDvec[1],   IDvec[0] );
}

myuint apply_bigskip(myuint* Vout, myuint* Vin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t  streamID ){
	/*
	 makes a derived state vector, Vout, from the mother state vector Vin
	 by skipping a large number of steps, determined by the given seeding ID's
	 
	 it is mathematically guaranteed that the substreams derived in this way from the SAME (!!!) Vin will not collide provided
	 1) at least one bit of ID is different
	 2) less than 10^100 numbers are drawn from the stream 
	 (this is good enough : a single CPU will not exceed this in the lifetime of the universe, 10^19 sec, 
	 even if it had a clock cycle of Planch time, 10^44 Hz )
	 
	 Caution: never apply this to a derived vector, just choose some mother vector Vin, for example the unit vector by seed_vielbein(X,0), 
	 and use it in all your runs, just change runID to get completely nonoverlapping streams of random numbers on a different day.
	 
	 clusterID and machineID are provided for the benefit of large organizations who wish to ensure that a simulation 
	 which is running in parallel on a large number of  clusters and machines will have non-colliding source of random numbers.
	 
	 did i repeat it enough times? the non-collision guarantee is absolute, not probabilistic
	 
	 */
	
	
	const	myuint skipMat[128][N] = // to make this file, delete all except some chosen 128 rows of the coefficients table
#if (N==240)
#include "mixmax_skip_N240.icc"
#elif (N==120)
#include "mixmax_skip_N120.icc"

#elif (N==256)
#if (SPECIAL==-1)
#include "mixmax_skip_N256.oldS.icc"
#else
#include "mixmax_skip_N256.icc"
#endif

#elif (N==8)
#include "mixmax_skip_N8.icc"
#elif (N==17)
#include "mixmax_skip_N17.icc"
#endif
	;
	
	myID_t IDvec[4] = {streamID, runID, machineID, clusterID};
	int r,i,j,  IDindex;
	myID_t id;
	myuint Y[N], cum[N];
	myuint coeff;
	myuint* rowPtr;
    myuint sumtot=0;
	

    for (i=0; i<N; i++) { Y[i] = Vin[i]; sumtot = modadd( sumtot, Vin[i]); } ;
	for (IDindex=0; IDindex<4; IDindex++) { // go from lower order to higher order ID
		id=IDvec[IDindex];
		//printf("now doing ID at level %d, with ID = %d\n", IDindex, id);     
		r = 0;
		while (id){
			if (id & 1) { 
				rowPtr = (myuint*)skipMat[r + IDindex*8*sizeof(myID_t)];
				//printf("free coeff for row %d is %llu\n", r, rowPtr[0]);
				for (i=0; i<N; i++){ cum[i] = 0; }    
				for (j=0; j<N; j++){              // j is lag, enumerates terms of the poly
					// for zero lag Y is already given
					coeff = rowPtr[j]; // same coeff for all i
                    for (i =0; i<N; i++){
                        cum[i] =  fmodmulM61( cum[i], coeff ,  Y[i] ) ;
                    }
					sumtot = iterate_raw_vec(Y, sumtot); 
				}
                sumtot=0;
                for (i=0; i<N; i++){ Y[i] = cum[i]; sumtot = modadd( sumtot, cum[i]); } ;
			}
		id = (id >> 1); r++; // bring up the r-th bit in the ID		
		}		
	}
    sumtot=0;
	for (i=0; i<N; i++){ Vout[i] = Y[i]; sumtot = modadd( sumtot, Y[i]); } ;  // returns sumtot, and copy the vector over to Vout
	return (sumtot) ;
}
#else
#warning For this N, we dont have the skipping coefficients yet, using alternative method to seed

void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t  streamID ){
    Xin->V[0] = (myuint)clusterID;
    Xin->V[1] = (myuint)machineID;
    Xin->V[2] = (myuint)runID;
    Xin->V[3] = (myuint)streamID;
    Xin->V[4] = (myuint)clusterID << 5;
    Xin->V[5] = (myuint)machineID << 7;
    Xin->V[6] = (myuint)runID     << 11;
    Xin->V[7] = (myuint)streamID  << 13;
    precalc(Xin);
    Xin->sumtot = iterate_raw_vec(Xin->V, Xin->sumtot);
    Xin->sumtot = iterate_raw_vec(Xin->V, Xin->sumtot);
}
#endif // SKIPISON