File: diffusion.cpp

package info (click to toggle)
zfp 1.0.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,744 kB
  • sloc: cpp: 20,656; ansic: 18,871; pascal: 1,231; f90: 907; python: 255; makefile: 183; sh: 79; fortran: 70
file content (478 lines) | stat: -rw-r--r-- 15,169 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
// forward Euler finite difference solution to the heat equation on a 2D grid

#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <sstream>
#include "zfp/array2.hpp"
#include "zfp/constarray2.hpp"
#include "zfp/codec/gencodec.hpp"
#include "array2d.hpp"

// add half precision if compiler supports it
#define __STDC_WANT_IEC_60559_TYPES_EXT__
#include <cfloat>
#ifdef FLT16_MAX
  #define WITH_HALF 1
#else
  #undef WITH_HALF
#endif

#ifdef _OPENMP
#include <omp.h>
#endif

// uncompressed tiled arrays based on zfp generic codec
namespace tiled {
#if WITH_HALF
  typedef zfp::array2< double, zfp::codec::generic2<double, _Float16> > array2h;
#endif
  typedef zfp::array2< double, zfp::codec::generic2<double, float> > array2f;
  typedef zfp::array2< double, zfp::codec::generic2<double, double> > array2d;
}

// enumeration of uncompressed storage types
enum storage_type {
  type_none = 0,
  type_half = 1,
  type_float = 2,
  type_double = 3
};

// constants used in the solution
class Constants {
public:
  Constants(size_t nx, size_t ny, size_t nt) :
    nx(nx),
    ny(ny),
    nt(nt),
    x0((nx - 1) / 2),
    y0((ny - 1) / 2),
    k(0.04),
    dx(2.0 / (std::max(nx, ny) - 1)),
    dy(2.0 / (std::max(nx, ny) - 1)),
    dt(0.5 * (dx * dx + dy * dy) / (8 * k)),
    tfinal(nt ? nt * dt : 1.0),
    pi(3.14159265358979323846)
  {}

  size_t nx;     // grid points in x
  size_t ny;     // grid points in y
  size_t nt;     // number of time steps (0 for default)
  size_t x0;     // x location of heat source
  size_t y0;     // y location of heat source
  double k;      // diffusion constant
  double dx;     // grid spacing in x
  double dy;     // grid spacing in y
  double dt;     // time step
  double tfinal; // minimum time to run solution to
  double pi;     // 3.141...
};

// compute Laplacian uxx + uyy at (x, y)
template <class array2d>
inline double
laplacian(const array2d& u, size_t x, size_t y, const Constants& c)
{
  double uxx = (u(x - 1, y) - 2 * u(x, y) + u(x + 1, y)) / (c.dx * c.dx);
  double uyy = (u(x, y - 1) - 2 * u(x, y) + u(x, y + 1)) / (c.dy * c.dy);
  return uxx + uyy;
}

template <class state, class scratch>
inline void
time_step_parallel(state& u, scratch& v, const Constants& c);

#ifdef _OPENMP
// advance solution in parallel via thread-safe views
template <>
inline void
time_step_parallel(zfp::array2d& u, zfp::array2d& du, const Constants& c)
{
  // flush shared cache to ensure cache consistency across threads
  u.flush_cache();
  // zero-initialize du
  du.set(0);
  // compute du/dt in parallel
  #pragma omp parallel
  {
    // create read-only private view of entire array u
    zfp::array2d::private_const_view myu(&u);
    // create read-write private view into rectangular subset of du
    zfp::array2d::private_view mydu(&du);
    mydu.partition(omp_get_thread_num(), omp_get_num_threads());
    // process rectangular region owned by this thread
    for (size_t j = 0; j < mydu.size_y(); j++) {
      size_t y = mydu.global_y(j);
      if (1 <= y && y <= c.ny - 2)
        for (size_t i = 0; i < mydu.size_x(); i++) {
          size_t x = mydu.global_x(i);
          if (1 <= x && x <= c.nx - 2)
            mydu(i, j) = c.dt * c.k * laplacian(myu, x, y, c);
        }
    }
    // compress all private cached blocks to shared storage
    mydu.flush_cache();
  }
  // take forward Euler step in serial
  for (size_t i = 0; i < u.size(); i++)
    u[i] += du[i];
}
#else
// dummy template instantiation when OpenMP support is not available
template <>
inline void time_step_parallel(zfp::array2d&, zfp::array2d&, const Constants&) {}
#endif

// dummy template instantiations; never executed
template <>
inline void time_step_parallel(zfp::const_array2d&, raw::array2d&, const Constants&) {}
template <>
inline void time_step_parallel(raw::array2d&, raw::array2d&, const Constants&) {}
template <>
inline void time_step_parallel(tiled::array2d&, tiled::array2d&, const Constants&) {}
template <>
inline void time_step_parallel(tiled::array2f&, tiled::array2f&, const Constants&) {}
#if WITH_HALF
template <>
inline void time_step_parallel(tiled::array2h&, tiled::array2h&, const Constants&) {}
#endif

// advance solution using integer array indices (generic implementation)
template <class state, class scratch>
inline void
time_step_indexed(state& u, scratch& du, const Constants& c)
{
  // compute du/dt
  for (size_t y = 1; y < c.ny - 1; y++)
    for (size_t x = 1; x < c.nx - 1; x++)
      du(x, y) = c.dt * c.k * laplacian(u, x, y, c);
  // take forward Euler step
  for (uint i = 0; i < u.size(); i++)
    u[i] += du[i];
}

// advance solution using integer array indices (read-only arrays)
template <>
inline void
time_step_indexed(zfp::const_array2d& u, raw::array2d& v, const Constants& c)
{
  // initialize v as uncompressed copy of u
  u.get(&v[0]);
  // take forward Euler step v += (du/dt) dt
  for (size_t y = 1; y < c.ny - 1; y++)
    for (size_t x = 1; x < c.nx - 1; x++)
      v(x, y) += c.dt * c.k * laplacian(u, x, y, c);
  // update u with uncompressed copy v
  u.set(&v[0]);
}

// advance solution using array iterators (generic implementation)
template <class state, class scratch>
inline void
time_step_iterated(state& u, scratch& du, const Constants& c)
{
  // compute du/dt
  for (typename scratch::iterator q = du.begin(); q != du.end(); q++) {
    size_t x = q.i();
    size_t y = q.j();
    if (1 <= x && x <= c.nx - 2 &&
        1 <= y && y <= c.ny - 2)
      *q = c.dt * c.k * laplacian(u, x, y, c);
  }
  // take forward Euler step
  for (typename state::iterator p = u.begin(); p != u.end(); p++)
    *p += du(p.i(), p.j());
}

// advance solution using array iterators (read-only arrays)
template <>
inline void
time_step_iterated(zfp::const_array2d& u, raw::array2d& v, const Constants& c)
{
  // initialize v as uncompressed copy of u
  u.get(&v[0]);
  // take forward Euler step v += (du/dt) dt
  for (raw::array2d::iterator q = v.begin(); q != v.end(); q++) {
    size_t x = q.i();
    size_t y = q.j();
    if (1 <= x && x <= c.nx - 2 &&
        1 <= y && y <= c.ny - 2)
      *q += c.dt * c.k * laplacian(u, x, y, c);
  }
  // update u with uncompressed copy v
  u.set(&v[0]);
}

// set initial conditions with a point heat source (u is assumed zero-initialized)
template <class state, class scratch>
inline void
initialize(state& u, scratch&, const Constants& c)
{
  u(c.x0, c.y0) = 1;
}

// set initial conditions for const_array; requires updating the whole array
template <>
inline void
initialize(zfp::const_array2d& u, raw::array2d& v, const Constants& c)
{
  v(c.x0, c.y0) = 1;
  u.set(&v[0]);
}

// solve heat equation
template <class state, class scratch>
inline double
solve(state& u, scratch& v, const Constants& c, bool iterator, bool parallel)
{
  // initialize u with point heat source
  initialize(u, v, c);

  // iterate until final time
  double t;
  for (t = 0; t < c.tfinal; t += c.dt) {
    // print time and effective rate
    double rate = double(u.size_bytes(ZFP_DATA_PAYLOAD)) * CHAR_BIT / u.size();
    double rest = double(u.size_bytes(ZFP_DATA_ALL ^ ZFP_DATA_PAYLOAD) * CHAR_BIT / u.size());
    std::cerr << "time=" << std::setprecision(6) << std::fixed << t << " ";
    std::cerr << "rate=" << std::setprecision(3) << std::fixed << rate << " (+" << rest << ")" << std::endl;
    // advance solution one time step
    if (parallel)
      time_step_parallel(u, v, c);
    else if (iterator)
      time_step_iterated(u, v, c);
    else
      time_step_indexed(u, v, c);
  }

  return t;
}

// compute sum of array values
template <class state>
inline double
total(const state& u)
{
  double s = 0;
  const size_t nx = u.size_x();
  const size_t ny = u.size_y();
  for (size_t y = 1; y < ny - 1; y++)
    for (size_t x = 1; x < nx - 1; x++)
      s += u(x, y);
  return s;
}

// compute root mean square error with respect to exact solution
template <class state>
inline double
error(const state& u, const Constants& c, double t)
{
  double e = 0;
  for (size_t y = 1; y < c.ny - 1; y++) {
    double py = c.dy * ((int)y - (int)c.y0);
    for (size_t x = 1; x < c.nx - 1; x++) {
      double px = c.dx * ((int)x - (int)c.x0);
      double f = u(x, y);
      double g = c.dx * c.dy * std::exp(-(px * px + py * py) / (4 * c.k * t)) / (4 * c.pi * c.k * t);
      e += (f - g) * (f - g);
    }
  }
  return std::sqrt(e / ((c.nx - 2) * (c.ny - 2)));
}

// execute solver and evaluate error
template <class state, class scratch>
inline void
execute(state& u, scratch& v, size_t nt, bool iterator, bool parallel)
{
  Constants c(u.size_x(), u.size_y(), nt);
  double t = solve(u, v, c, iterator, parallel);
  double sum = total(u);
  double err = error(u, c, t);
  std::cerr.unsetf(std::ios::fixed);
  std::cerr << "sum=" << std::setprecision(6) << std::fixed << sum << " error=" << std::setprecision(6) << std::scientific << err << std::endl;
}

// print usage information
inline int
usage()
{
  std::cerr << "Usage: diffusion [options]" << std::endl;
  std::cerr << "Options:" << std::endl;
  std::cerr << "-a <tolerance> : use compressed arrays with given absolute error tolerance" << std::endl;
  std::cerr << "-b <blocks> : use 'blocks' 4x4 blocks of cache" << std::endl;
  std::cerr << "-c : use read-only compressed arrays" << std::endl;
  std::cerr << "-d : use double-precision tiled arrays" << std::endl;
  std::cerr << "-f : use single-precision tiled arrays" << std::endl;
#if WITH_HALF
  std::cerr << "-h : use half-precision tiled arrays" << std::endl;
#endif
  std::cerr << "-i : traverse arrays using iterators" << std::endl;
#ifdef _OPENMP
  std::cerr << "-j : use multithreading (only with compressed arrays)" << std::endl;
#endif
  std::cerr << "-n <nx> <ny> : number of grid points" << std::endl;
  std::cerr << "-p <precision> : use compressed arrays with given precision" << std::endl;
  std::cerr << "-r <rate> : use compressed arrays with given compressed bits/value" << std::endl;
  std::cerr << "-R : use compressed arrays with lossless compression" << std::endl;
  std::cerr << "-t <nt> : number of time steps" << std::endl;
  return EXIT_FAILURE;
}

int main(int argc, char* argv[])
{
  size_t nx = 128;
  size_t ny = 128;
  size_t nt = 0;
  size_t cache_size = 0;
  zfp_config config = zfp_config_none();
  bool iterator = false;
  bool parallel = false;
  bool writable = true;
  storage_type type = type_none;

  // parse command-line options
  for (int i = 1; i < argc; i++)
    if (std::string(argv[i]) == "-a") {
      double tolerance;
      if (++i == argc || sscanf(argv[i], "%lf", &tolerance) != 1)
        return usage();
      config = zfp_config_accuracy(tolerance);
    }
    else if (std::string(argv[i]) == "-b") {
      if (++i == argc || (std::istringstream(argv[i]) >> cache_size).fail())
        return usage();
      cache_size *= 4 * 4 * sizeof(double);
    }
    else if (std::string(argv[i]) == "-c")
      writable = false;
    else if (std::string(argv[i]) == "-d")
      type = type_double;
    else if (std::string(argv[i]) == "-f")
      type = type_float;
#if WITH_HALF
    else if (std::string(argv[i]) == "-h")
      type = type_half;
#endif
    else if (std::string(argv[i]) == "-i")
      iterator = true;
#ifdef _OPENMP
    else if (std::string(argv[i]) == "-j")
      parallel = true;
#endif
    else if (std::string(argv[i]) == "-n") {
      if (++i == argc || (std::istringstream(argv[i]) >> nx).fail() ||
          ++i == argc || (std::istringstream(argv[i]) >> ny).fail())
        return usage();
    }
    else if (std::string(argv[i]) == "-p") {
      uint precision;
      if (++i == argc || sscanf(argv[i], "%u", &precision) != 1)
        return usage();
      config = zfp_config_precision(precision);
    }
    else if (std::string(argv[i]) == "-r") {
      double rate;
      if (++i == argc || sscanf(argv[i], "%lf", &rate) != 1)
        return usage();
      config = zfp_config_rate(rate, false);
    }
    else if (std::string(argv[i]) == "-R")
      config = zfp_config_reversible();
    else if (std::string(argv[i]) == "-t") {
      if (++i == argc || (std::istringstream(argv[i]) >> nt).fail())
        return usage();
    }
    else
      return usage();

  bool compression = (config.mode != zfp_mode_null);

  // sanity check command-line arguments
  if (parallel && !compression) {
    fprintf(stderr, "multithreading requires compressed arrays\n");
    return EXIT_FAILURE;
  }
  if (parallel && !writable) {
    fprintf(stderr, "multithreading requires read-write arrays\n");
    return EXIT_FAILURE;
  }
  if (parallel && iterator) {
    fprintf(stderr, "multithreading does not support iterators\n");
    return EXIT_FAILURE;
  }
  if (compression && writable && config.mode != zfp_mode_fixed_rate) {
    fprintf(stderr, "compression mode requires read-only arrays (-c)\n");
    return EXIT_FAILURE;
  }
  if (!writable && !compression) {
    fprintf(stderr, "read-only arrays require compression parameters\n");
    return EXIT_FAILURE;
  }
  if (compression && type != type_none) {
    fprintf(stderr, "tiled arrays do not support compression parameters\n");
    return EXIT_FAILURE;
  }

  // if unspecified, set cache size to two layers of blocks
  if (!cache_size)
    cache_size = 2 * 4 * nx * sizeof(double);

  // solve problem
  if (compression) {
    // use compressed arrays
    if (writable) {
      // use read-write fixed-rate arrays
      zfp::array2d u(nx, ny, config.arg.rate, 0, cache_size);
      zfp::array2d v(nx, ny, config.arg.rate, 0, cache_size);
      execute(u, v, nt, iterator, parallel);
    }
    else {
      // use read-only variable-rate arrays
      zfp::const_array2d u(nx, ny, config, 0, cache_size);
      raw::array2d v(nx, ny);
      execute(u, v, nt, iterator, parallel);
    }
  }
  else {
    // use uncompressed arrays
    switch (type) {
#if WITH_HALF
      case type_half: {
          // use zfp generic codec with tiled half-precision storage
          tiled::array2h u(nx, ny, sizeof(__fp16) * CHAR_BIT, 0, cache_size);
          tiled::array2h v(nx, ny, sizeof(__fp16) * CHAR_BIT, 0, cache_size);
          execute(u, v, nt, iterator, parallel);
        }
        break;
#endif
      case type_float: {
          // use zfp generic codec with tiled single-precision storage
          tiled::array2f u(nx, ny, sizeof(float) * CHAR_BIT, 0, cache_size);
          tiled::array2f v(nx, ny, sizeof(float) * CHAR_BIT, 0, cache_size);
          execute(u, v, nt, iterator, parallel);
        }
        break;
      case type_double: {
          // use zfp generic codec with tiled double-precision storage
          tiled::array2d u(nx, ny, sizeof(double) * CHAR_BIT, 0, cache_size);
          tiled::array2d v(nx, ny, sizeof(double) * CHAR_BIT, 0, cache_size);
          execute(u, v, nt, iterator, parallel);
        }
        break;
      default: {
          // use uncompressed array with row-major double-precision storage
          raw::array2d u(nx, ny, sizeof(double) * CHAR_BIT);
          raw::array2d v(nx, ny, sizeof(double) * CHAR_BIT);
          execute(u, v, nt, iterator, parallel);
        }
        break;
    }
  }

  return 0;
}