1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
|
/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */
/*
* This code has been contributed by the DARPA HPCS program. Contact
* David Koester <dkoester@mitre.org> or Bob Lucas <rflucas@isi.edu>
* if you have questions.
*
* GUPS (Giga UPdates per Second) is a measurement that profiles the memory
* architecture of a system and is a measure of performance similar to MFLOPS.
* The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the
* GUPS capability of a system, much like the LINPACK benchmark is intended to
* exercise the MFLOPS capability of a computer. In each case, we would
* expect these benchmarks to achieve close to the "peak" capability of the
* memory system. The extent of the similarities between RandomAccess and
* LINPACK are limited to both benchmarks attempting to calculate a peak system
* capability.
*
* GUPS is calculated by identifying the number of memory locations that can be
* randomly updated in one second, divided by 1 billion (1e9). The term "randomly"
* means that there is little relationship between one address to be updated and
* the next, except that they occur in the space of one half the total system
* memory. An update is a read-modify-write operation on a table of 64-bit words.
* An address is generated, the value at that address read from memory, modified
* by an integer operation (add, and, or, xor) with a literal value, and that
* new value is written back to memory.
*
* We are interested in knowing the GUPS performance of both entire systems and
* system subcomponents --- e.g., the GUPS rating of a distributed memory
* multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a
* single processor. While there is typically a scaling of FLOPS with processor
* count, a similar phenomenon may not always occur for GUPS.
*
* For additional information on the GUPS metric, the HPCchallenge RandomAccess
* Benchmark,and the rules to run RandomAccess or modify it to optimize
* performance -- see http://icl.cs.utk.edu/hpcc/
*
*/
/*
* This file contains the computational core of the single cpu version
* of GUPS. The inner loop should easily be vectorized by compilers
* with such support.
*
* This core is used by both the single_cpu and star_single_cpu tests.
*/
#include <hpcc.h>
#include "RandomAccess.h"
/* Number of updates to table (suggested: 4x number of table entries) */
#define NUPDATE (4 * TableSize)
static void
RandomAccessUpdate_LCG(u64Int TableSize, u64Int *Table) {
u64Int i;
u64Int ran[128]; /* Current random numbers */
int j, logTableSize;
/* Perform updates to main table. The scalar equivalent is:
*
* u64Int ran;
* ran = 1;
* for (i=0; i<NUPDATE; i++) {
* ran = LCG_MUL64 * ran + LCG_ADD64;
* table[ran >> (64 - logTableSize)] ^= ran;
* }
*/
for (j=0; j<128; j++)
ran[j] = HPCC_starts_LCG((NUPDATE/128) * j);
logTableSize = 0;
for (i = 1; i < TableSize; i <<= 1)
logTableSize += 1;
for (i=0; i<NUPDATE/128; i++) {
/* #pragma ivdep */
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (j=0; j<128; j++) {
ran[j] = LCG_MUL64 * ran[j] + LCG_ADD64;
Table[ran[j] >> (64 - logTableSize)] ^= ran[j];
}
}
}
int
HPCC_RandomAccess_LCG(HPCC_Params *params, int doIO, double *GUPs, int *failure) {
u64Int i;
u64Int temp;
double cputime; /* CPU time to update table */
double realtime; /* Real time to update table */
double totalMem;
u64Int *Table;
u64Int logTableSize, TableSize;
FILE *outFile = NULL;
if (doIO) {
outFile = fopen( params->outFname, "a" );
if (! outFile) {
outFile = stderr;
fprintf( outFile, "Cannot open output file.\n" );
return 1;
}
}
/* calculate local memory per node for the update table */
totalMem = params->HPLMaxProcMem;
totalMem /= sizeof(u64Int);
/* calculate the size of update array (must be a power of 2) */
for (totalMem *= 0.5, logTableSize = 0, TableSize = 1;
totalMem >= 1.0;
totalMem *= 0.5, logTableSize++, TableSize <<= 1)
; /* EMPTY */
Table = HPCC_XMALLOC( u64Int, TableSize );
if (! Table) {
if (doIO) {
fprintf( outFile, "Failed to allocate memory for the update table (" FSTR64 ").\n", TableSize);
fclose( outFile );
}
return 1;
}
params->RandomAccess_LCG_N = (s64Int)TableSize;
/* Print parameters for run */
if (doIO) {
fprintf( outFile, "Main table size = 2^" FSTR64 " = " FSTR64 " words\n", logTableSize,TableSize);
fprintf( outFile, "Number of updates = " FSTR64 "\n", NUPDATE);
}
/* Initialize main table */
for (i=0; i<TableSize; i++) Table[i] = i;
/* Begin timing here */
cputime = -CPUSEC();
realtime = -RTSEC();
RandomAccessUpdate_LCG( TableSize, Table );
/* End timed section */
cputime += CPUSEC();
realtime += RTSEC();
/* make sure no division by zero */
*GUPs = (realtime > 0.0 ? 1.0 / realtime : -1.0);
*GUPs *= 1e-9*NUPDATE;
/* Print timing results */
if (doIO) {
fprintf( outFile, "CPU time used = %.6f seconds\n", cputime);
fprintf( outFile, "Real time used = %.6f seconds\n", realtime);
fprintf( outFile, "%.9f Billion(10^9) Updates per second [GUP/s]\n", *GUPs );
}
/* Verification of results (in serial or "safe" mode; optional) */
temp = 0x1;
for (i=0; i<NUPDATE; i++) {
temp = LCG_MUL64 * temp + LCG_ADD64;
Table[temp >> (64 - (int)logTableSize)] ^= temp;
}
temp = 0;
for (i=0; i<TableSize; i++)
if (Table[i] != i)
temp++;
if (doIO) {
fprintf( outFile, "Found " FSTR64 " errors in " FSTR64 " locations (%s).\n",
temp, TableSize, (temp <= 0.01*TableSize) ? "passed" : "failed");
}
if (temp <= 0.01*TableSize) *failure = 0;
else *failure = 1;
HPCC_free( Table );
if (doIO) {
fflush( outFile );
fclose( outFile );
}
return 0;
}
|