File: testreduce.c

package info (click to toggle)
gasnet 2025.8.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,424 kB
  • sloc: ansic: 114,758; cpp: 5,158; sh: 4,847; makefile: 2,715; perl: 1,774
file content (493 lines) | stat: -rw-r--r-- 19,653 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
/*   $Source: bitbucket.org:berkeleylab/gasnet.git/tests/testreduce.c $
 * Description: test of user-defined reductions
 * Copyright 2018, The Regents of the University of California
 * Terms of use are as specified in license.txt
 */

#define SCRATCH_SIZE (2*1024*1024)

#ifndef TEST_SEGSZ
#define TEST_SEGSZ (PAGESZ + SCRATCH_SIZE) // even team's scratch + pad
#endif

#include <gasnetex.h>
#include <gasnet_coll.h>
#include <test.h>

#include <string.h>
#include <stdint.h>
#include <float.h>
#include <math.h>

// Size of vector
static int Nelem;
#define NELEM_DEFAULT 64

// Polymorphic operator for a commutative operation: addition
void op_ADD(const void * arg1,
            void *       arg2_and_out,
            size_t       count,
            const void * cdata)
{
  gex_DT_t dt = (gex_DT_t)(uintptr_t)cdata;
  switch (dt) {
    case GEX_DT_I32:
    case GEX_DT_U32: {
      const uint32_t * restrict x = arg1;
            uint32_t * restrict y = arg2_and_out;
      for (size_t i = 0; i < count; ++i) y[i] += x[i];
      break;
    }
    case GEX_DT_I64:
    case GEX_DT_U64: {
      const uint64_t * restrict x = arg1;
            uint64_t * restrict y = arg2_and_out;
      for (size_t i = 0; i < count; ++i) y[i] += x[i];
      break;
    }
    case GEX_DT_FLT: {
      const float * restrict x = arg1;
            float * restrict y = arg2_and_out;
      for (size_t i = 0; i < count; ++i) y[i] += x[i];
      break;
    }
    case GEX_DT_DBL: {
      const double * restrict x = arg1;
            double * restrict y = arg2_and_out;
      for (size_t i = 0; i < count; ++i) y[i] += x[i];
      break;
    }
  }
}

// Type and operator for a non-commutative operation: strcat
// "Post-concatenates" two null-terminated strings (e.g. "arg2 .= arg1" in perl)
static size_t myDT_sz;
void op_concat(const void * arg1,
               void *       arg2_and_out,
               size_t       count,
               const void * cdata)
{
  const size_t dt_sz = (size_t)(uintptr_t)cdata;
  const char * restrict x = arg1;
        char * restrict y = arg2_and_out;
  for (size_t i = 0; i < count; ++i, x+=dt_sz, y+=dt_sz) {
    strcat(y,x);
  }
}

// Macros to ease other macros
#define I32_TYPE  int32_t
#define U32_TYPE uint32_t
#define I64_TYPE  int64_t
#define U64_TYPE uint64_t
#define FLT_TYPE    float
#define DBL_TYPE   double

// Convenience to avoid calling TEST_RAND with lo==hi
#define RAND_ROOT(size) ((size == 1) ? 0 : TEST_RAND(0, size-1))

// Test ADD (both built-in and user-defined for a built-in DT
#define TEST_ADD(DT) do {                                                     \
  typedef DT##_TYPE TYPE;                                                     \
  size_t nbytes = Nelem * sizeof(TYPE);                                       \
  TYPE *answers = test_malloc(iters * nbytes);                                \
  TYPE *src = test_malloc(iters * nbytes);                                    \
  TYPE *dst = answers;                                                        \
  int failures = 0;                                                           \
  int local_count = 0;                                                        \
  for (int i = 0; i < Nelem; ++i) src[i] = (rank + i);                        \
  for (int i = 0; i < iters; ++i) {                                           \
    if (TEST_RAND_ONEIN(5)) {                                                 \
      memset(dst, 0xaa, nbytes);                                              \
      ev[i] = gex_Coll_ReduceToAllNB(tm, dst, src,                            \
                                     GEX_DT_##DT, sizeof(TYPE), Nelem,        \
                                     GEX_OP_ADD, NULL, NULL, 0);              \
      local_count += 1;                                                       \
      dst += Nelem;                                                           \
    } else {                                                                  \
      gex_Rank_t root = RAND_ROOT(size);                                      \
      if (root == rank) memset(dst, 0x55, nbytes);                            \
      ev[i] = gex_Coll_ReduceToOneNB(tm, root, dst, src,                      \
                                     GEX_DT_##DT, sizeof(TYPE), Nelem,        \
                                     GEX_OP_ADD, NULL, NULL, 0);              \
      if (root == rank) {                                                     \
        local_count += 1;                                                     \
        dst += Nelem;                                                         \
      }                                                                       \
    }                                                                         \
  }                                                                           \
  gex_Event_WaitAll(ev, iters, 0);                                            \
  dst = answers;                                                              \
  for (int i = 0; i < local_count; ++i) {                                     \
    for (int j = 0; j < Nelem; ++j) {                                         \
      uint64_t got = *dst++;                                                  \
      if (got != correct[j]) {                                                \
        MSG("Mismatch GEX_OP_ADD(GEX_DT_" #DT "), iter=%i, elem=%i"           \
            " (got=%" PRIu64 ", want=%" PRIu64 ")", i, j, got, correct[j]);   \
        ++failures;                                                           \
      }                                                                       \
    }                                                                         \
  }                                                                           \
  local_count = 0;                                                            \
  dst = answers;                                                              \
  for (int i = 0; i < iters; ++i) {                                           \
    if (TEST_RAND_ONEIN(5)) {                                                 \
      memset(dst, 0xee, nbytes);                                              \
      ev[i] = gex_Coll_ReduceToAllNB(tm, dst, src,                            \
                                     GEX_DT_##DT, sizeof(TYPE), Nelem,        \
                                     GEX_OP_USER, &op_ADD,                    \
                                     (void*)(uintptr_t)GEX_DT_##DT, 0);       \
      local_count += 1;                                                       \
      dst += Nelem;                                                           \
    } else {                                                                  \
      gex_Rank_t root = RAND_ROOT(size);                                      \
      if (root == rank) memset(dst, 0x33, nbytes);                            \
      ev[i] = gex_Coll_ReduceToOneNB(tm, root, dst, src,                      \
                                     GEX_DT_##DT, sizeof(TYPE), Nelem,        \
                                     GEX_OP_USER, &op_ADD,                    \
                                     (void*)(uintptr_t)GEX_DT_##DT, 0);       \
      if (root == rank) {                                                     \
        local_count += 1;                                                     \
        dst += Nelem;                                                         \
      }                                                                       \
    }                                                                         \
  }                                                                           \
  gex_Event_WaitAll(ev, iters, 0);                                            \
  dst = answers;                                                              \
  for (int i = 0; i < local_count; ++i) {                                     \
    for (int j = 0; j < Nelem; ++j) {                                         \
      uint64_t got = *dst++;                                                  \
      if (got != correct[j]) {                                                \
        MSG("Mismatch GEX_OP_USER(GEX_DT_" #DT "), iter=%i, elem=%i"          \
            " (got=%" PRIu64 ", want=%" PRIu64 ")", i, j, got, correct[j]);   \
        ++failures;                                                           \
      }                                                                       \
    }                                                                         \
  }                                                                           \
  test_free(src);                                                             \
  test_free(answers);                                                         \
  if (failures) ERR("GEX_DT_" #DT ": %d failures", failures);                 \
  gex_Event_Wait(gex_Coll_BarrierNB(tm,0));                                   \
} while (0)

static int iters = 0;

//
// GEX_OP ADD
// Test for every type which can exactly represent Sum(ranks).
// This tests both built-in and user-defines ADD operations
// TODO: split to make a team small enough to test instead of skipping?
// TODO: args to limit which tests?
//
void do_test_add(gex_TM_t tm)
{
    gex_Rank_t rank = gex_TM_QueryRank(tm);
    gex_Rank_t size = gex_TM_QuerySize(tm);

    uint64_t *correct = test_malloc(iters * Nelem * sizeof(uint64_t));
    gex_Event_t *ev = test_malloc(iters * sizeof(gex_Event_t));

    assert_always(size <= INT32_MAX - Nelem);
    // Note use of a wide i to prevent overflow in intermediate value(s)
    for (uint64_t i = 0; i < Nelem; ++i) {
      correct[i] = ((size + i) * (size + i - 1) - i * (i - 1)) / 2;
    }

    uint64_t biggest = correct[Nelem-1];
    if (biggest <= (uint64_t) INT32_MAX) {
      MSG0("Running ADD:GEX_DT_I32 test...");
      TEST_ADD(I32);
    } else {
      MSG0("WARNING: skipping ADD:GEX_DT_I32 test (would overflow)");
    }
    if (biggest <= (uint64_t)UINT32_MAX) {
      MSG0("Running ADD:GEX_DT_U32 test...");
      TEST_ADD(U32);
    } else {
      MSG0("WARNING: skipping ADD:GEX_DT_U32 test (would overflow)");
    }
    MSG0("Running ADD:GEX_DT_I64 test...");
    TEST_ADD(I64);
    MSG0("Running ADD:GEX_DT_U64 test...");
    TEST_ADD(U64);
    if (biggest < (uint64_t)pow(FLT_RADIX, FLT_MANT_DIG)) {
      MSG0("Running ADD:GEX_DT_FLT test...");
      TEST_ADD(FLT);
    } else {
      MSG0("WARNING: skipping ADD:GEX_DT_FLT test (exceeds mantissa bits)");
    }
    if (biggest < (uint64_t)pow(FLT_RADIX, DBL_MANT_DIG)) {
      MSG0("Running ADD:GEX_DT_DBL test...");
      TEST_ADD(DBL);
    } else {
      MSG0("WARNING: skipping ADD:GEX_DT_DBL test (exceeds mantissa bits)");
    }

    test_free(ev);
    test_free(correct);
}

//
// Test "concat" operator on user-defined data type (a fixed-len char[])
//
void do_test_user(gex_TM_t tm)
{
    MSG0("Running GEX_DT_USER tests...");

    gex_Rank_t rank = gex_TM_QueryRank(tm);
    gex_Rank_t size = gex_TM_QuerySize(tm);

    // Use just 'A' - 'Z' for ease of debugging by humans
    #define BASE_CHAR 'A'
    #define NUM_CHAR  26
    size_t dt_sz = size + 1; // One for '\0' terminator
    char *InStrings  = test_calloc(1, Nelem * dt_sz * sizeof(char));
    char *OutStrings = test_malloc(Nelem * dt_sz * sizeof(char));
    for (size_t i = 0; i < Nelem; ++i) {
      char *p = InStrings + i * dt_sz;
      p[0] = BASE_CHAR + ((rank + i) % NUM_CHAR);
      p[1] = '\0';
    }

    // GEX_OP_USER(GEX_DT_USER): OP(x,y) := strcat(y,x)
    // Due to permitted assumption of commutativity, order is unspecified.
    // However, the reduction must preserve number of instances of each char.
    int failures = 0;
    for (int i = 0; i < iters; ++i) {
      gex_Rank_t root = RAND_ROOT(size);
      if (TEST_RAND_ONEIN(5)) {
        gex_Event_Wait(
          gex_Coll_ReduceToAllNB(tm, OutStrings, InStrings,
                                 GEX_DT_USER, dt_sz, Nelem,
                                 GEX_OP_USER, &op_concat,
                                 (void*)(uintptr_t)dt_sz, 0));

      } else {
        gex_Event_Wait(
          gex_Coll_ReduceToOneNB(tm, root, OutStrings, InStrings,
                                 GEX_DT_USER, dt_sz, Nelem,
                                 GEX_OP_USER, &op_concat,
                                 (void*)(uintptr_t)dt_sz, 0));
      }
      if (rank==root) { // Even in ToAll case, one (random) proc verifies
        for (size_t j = 0; j < Nelem; ++j) {
          const char *str = OutStrings + j * dt_sz;
          // Count the number of occurances of each char
          gex_Rank_t tally[NUM_CHAR];
          memset(tally, 0, sizeof(tally));
          for (size_t k = 0; k < (dt_sz-1); ++k) {
            uint8_t idx = str[k] - BASE_CHAR;
            if (idx > 25) {
              ++failures;
            } else {
              ++tally[idx];
            }
          }
          // Check counts against the expected ones
          gex_Rank_t numer = size / NUM_CHAR;
          gex_Rank_t denom = size % NUM_CHAR;
          for (int k = 0; k < NUM_CHAR; ++k) {
            size_t expect = (numer + (k < denom));
            failures += (tally[(j + k) % NUM_CHAR] != expect);
          }
        }
      }
    }
    if (failures) ERR("GEX_OP_USER(GEX_DT_USER): %d failures", failures);
    gex_Event_Wait(gex_Coll_BarrierNB(tm,0));

    // GEX_OP_USER_NC(GEX_DT_USER): OP(x,y) := strcat(y,x)
    // Due to non-commutativity, order must be preserved.
  #if 0 // TODO: enabled once GEX_OP_USER_NC is implemented
    failures = 0;
    for (int i = 0; i < iters; ++i) {
      gex_Rank_t root = RAND_ROOT(size);
      if (TEST_RAND_ONEIN(5)) {
        gex_Event_Wait(
          gex_Coll_ReduceToAllNB(tm, OutStrings, InStrings,
                                 GEX_DT_USER, dt_sz, Nelem,
                                 GEX_OP_USER_NC, &op_concat,
                                 (void*)(uintptr_t)dt_sz, 0));
      } else {
        gex_Event_Wait(
          gex_Coll_ReduceToOneNB(tm, root, OutStrings, InStrings,
                                 GEX_DT_USER, dt_sz, Nelem,
                                 GEX_OP_USER_NC, &op_concat,
                                 (void*)(uintptr_t)dt_sz, 0));
      }
      if (rank==root) { // Even in ToAll case, one (random) proc verifies
        for (size_t j = 0; j < Nelem; ++j) {
          // validate result against properly ordered (lexically reversed) result
          const char *str = OutStrings + j * dt_sz;
          int idx = (j + (size - 1)) % NUM_CHAR;
          for (size_t k = 0; k < (dt_sz-1); ++k) {
            failures += (str[k] != BASE_CHAR + idx);
             // not using % since (-1 % x) is -1, not (x-1)
            idx = idx ? (idx - 1) : (NUM_CHAR - 1);
          }
        }
      }
    }
    if (failures) ERR("GEX_OP_USER_NC(GEX_DT_USER): %d failures", failures);
    gex_Event_Wait(gex_Coll_BarrierNB(tm,0));
  #endif

  test_free(OutStrings);
  test_free(InStrings);
}

void do_tests(gex_TM_t tm) {
  do_test_add(tm);    // GEX_OP_ADD
  do_test_user(tm);   // GEX_DT_USER

  //
  // TODO: test the remaining built-in operators.
  //
  // Validation:
  // Option 1: Assuming the user-defined ADD passed above, use a user-defined
  //           reduction as the reference for validation of each built-in
  //           operator.
  // Option 2: Follow the pattern of the ADD test above, and chose inputs that
  //           provide a locally computable reference for validation.
  //
  // GEX_OP_MULT:
  // There is concern over FP Mult and exact reproducibility.
  // If (and only if) we assume a "high-quality" implementation with full
  // reproducability of the order of evaliuation it *might* be reasonable to
  // exect bit-wise idenitcal results from the built-in and user-defined
  // versions of the same operator (use Option 1, above).  However, that sounds
  // risky.
  // An alternative to use Option 2, picking the input with care such that the
  // product will never (even at an intermediate value in the worst-case
  // application of commutativity) require more than the available mantissa
  // digits (24 for IEEE float).
  // Another alternative is to test for equality within "epsilon".  However,
  // without help from a skilled numerical analyst (or a text by one), it is
  // not obvious how that would need to scale with the number of ranks.
  //
  // GEX_OP_{MIN,MAX}:
  // No issues are anticipated, as long as we do not include Nan, Inf or
  // negative-0 among the FP inputs.
  //
  // GEX_OP_{AND,OR}:
  // Care is needed to avoid inputs which "saturate" the output.  Otherwise the
  // values 0 and ~0 could be arrived at "accidentally" and still pass
  // validation.
  //
  // GEX_OP_XOR:
  // No issues are anticipated.
  //
}

static gex_TM_t myteam;
static gex_TM_t subtm = GEX_TM_INVALID;
static int done = 0;

void *thread_main(void *arg) {
  int tid = (int)(uintptr_t)arg;

#if GASNET_PAR
  if (tid) {
    GASNET_BLOCKUNTIL(done);
    return NULL;
  }
#else
  assert_always(!tid);
#endif

  MSG0("Testing over all ranks:");
  do_tests(myteam);

  MSG0("Testing over odd/even subteams:");
  do_tests(subtm);

  done = 1;

  return NULL;
}

int main(int argc, char **argv)
{
  gex_Client_t      myclient;
  gex_EP_t          myep;
  gex_Segment_t     mysegment;
  int               pollers = 0;

  gex_Client_Init(&myclient, &myep, &myteam, "testreduce", &argc, &argv, 0);

  int arg = 1;
  int help = 0;
  while (argc > arg) {
    if (!strcmp(argv[arg], "-p")) {
#if GASNET_PAR
      ++arg;
      if (argc > arg) { pollers = atoi(argv[arg]); arg++; }
      else help = 1;
#else
      if (0 == gex_TM_QueryRank(myteam)) {
        fprintf(stderr, "testcoll %s\n", GASNET_CONFIG_STRING);
        fprintf(stderr, "ERROR: The -p option is only available in the PAR configuration.\n");
        fflush(NULL);
      }
      sleep(1);
      gasnet_exit(1);
#endif
    } else if (argv[arg][0] == '-') {
      help = 1;
      ++arg;
    } else break;
  }

  if (argc > arg) { iters = atoi(argv[arg]); ++arg; }
  if (!iters) iters = 1000;

  if (argc > arg) { Nelem = atoi(argv[arg]); ++arg; }
  if (!Nelem) Nelem = NELEM_DEFAULT;

  unsigned int seed = 0;
  if (argc > arg) { seed = atoi(argv[arg]); ++arg; }

#if GASNET_PAR
  #define USAGE "[options] (iters) (nelem) (seed)\n" \
                "  The -p option gives the number of polling threads, specified as\n" \
                "    a non-negative integer argument (default is no polling threads)."
#else
  #define USAGE "(iters) (nelem) (seed)"
#endif
  test_init("testreduce",0,USAGE);

  TEST_SET_WAITMODE(1 + pollers);
  if (argc > arg || help) test_usage();

  GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ_REQUEST));
  BARRIER();

  if (seed == 0) {
    seed = (((unsigned int)TIME()) & 0xFFFF);
    TEST_BCAST(&seed, 0, &seed, sizeof(seed));
  }
  TEST_SRAND(seed); // SAME seed

  MSG0("Running %i iterations of %i-element reduction tests (seed = %u).", iters, Nelem, seed);

  int color = gex_TM_QueryRank(myteam) & 1; // odds & evens
  size_t scratch_sz = gex_TM_Split(&subtm, myteam, color, 0, 0, 0, GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED);
  gex_TM_Split(&subtm, myteam, color, 0, (void*)TEST_MYSEG(), scratch_sz, 0);

#if GASNET_PAR
  MSG("Forking %d gasnet threads (1 active, %d polling)", pollers+1, pollers);
  test_createandjoin_pthreads(pollers+1, &thread_main, NULL, 0);
#else
  thread_main(NULL);
#endif

  GASNET_Safe(gex_TM_Destroy(subtm, NULL, 0));

  BARRIER();
  MSG0("done.");
  gasnet_exit(0);

  /* Not reached in most implementations */
  return 0;
}