File: pio_write_setup_grid.c

package info (click to toggle)
cdo 2.5.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 49,964 kB
  • sloc: cpp: 180,221; ansic: 95,352; sh: 7,292; f90: 6,089; makefile: 1,975; ruby: 1,078; csh: 1,020; python: 995; fortran: 319; pascal: 219; perl: 9
file content (186 lines) | stat: -rw-r--r-- 6,430 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#if defined(HAVE_CONFIG_H)
#include "config.h"
#endif

#include <stdio.h>
#include <stdlib.h>

#ifdef USE_MPI
#include <mpi.h>
#include <yaxt.h>
#include "error.h"
#include "pio_util.h"
#endif
#include "dmemory.h"

#if defined USE_MPI && defined HAVE_PPM_CORE
#include <ppm/ppm_uniform_partition.h>
#include <core/ppm_combinatorics.h>
#endif

#include "cdi.h"
#ifdef USE_MPI
#include "cdipio.h"
#endif
#include "pio_write_setup_grid.h"
#include "cdi_uuid.h"
#include "simple_model_helper.h"

#if USE_MPI
void
findPartition2D(int npart[2], int num_parts)
{
  const uint64_t rscale = 256;
  uint32_t *factors = NULL;
  xassert(num_parts > 0);
  int numFactors = PPM_prime_factorization_32((uint32_t) num_parts, &factors);
  /* try to distribute prime factors on dimensions to get
   * approx. 2 times as many parts in x dim than y dim */
  const uint64_t optimumRatio = rscale * 2;
  npart[0] = num_parts, npart[1] = 1;
  uint_fast32_t npart_attempt[2];
  uint64_t bestRatio = (uint64_t) num_parts * rscale, bestDiff = (uint64_t) llabs((long long) (bestRatio - optimumRatio));
  /* test all assignments of factors to dimensions, starting with
   * only one assigned to x dim (omitting 0 because that would
   * always give npart[1] > npart[0] */
  for (int assign2X = 1; assign2X <= numFactors; ++assign2X)
  {
    uint_fast32_t pattern = (UINT32_C(1) << assign2X) - 1, lastPattern = pattern << (numFactors - assign2X);
    do {
      npart_attempt[0] = 1;
      npart_attempt[1] = 1;
      /* loop over all factors */
      for (uint_fast32_t i = 0; i < (uint_fast32_t) numFactors; ++i)
      {
        uint_fast32_t dim_idx = (pattern >> i) & 1;
        npart_attempt[dim_idx] *= factors[i];
      }
      uint64_t ratio = ((uint64_t) npart_attempt[0] * rscale) / (uint64_t) npart_attempt[1];
      uint64_t diff = (uint64_t) llabs((long long) (ratio - optimumRatio));
      if (diff < bestDiff)
      {
        npart[0] = (int) npart_attempt[0];
        npart[1] = (int) npart_attempt[1];
        bestDiff = diff;
        bestRatio = ratio;
      }
      {
        uint_fast32_t t;
#if HAVE_DECL___BUILTIN_CTZ
        t = pattern | (pattern - 1);
        pattern = (t + 1) | (((~t & -~t) - 1) >> (__builtin_ctz((unsigned) pattern) + 1));
#else
        t = (pattern | (pattern - 1)) + 1;
        pattern = t | ((((t & -t) / (pattern & -pattern)) >> 1) - 1);
#endif
      }
    } while (pattern <= lastPattern);
  }
  Free(factors);
}
#endif

int
setupGrid(const struct model_config *setup, MPI_Comm comm)
{
  int nlon = setup->nlon, nlat = setup->nlat;
  int gridID;
  int rank = 0;
#if USE_MPI
  xmpi(MPI_Comm_rank(comm, &rank));
#else
  (void) comm;
#endif
  if (setup->flags & PIO_WRITE_CONFIG_USE_DIST_GRID_FLAG)
  {
#if USE_MPI
    if (setup->flags & PIO_WRITE_CONFIG_CREATE_CURVILINEAR_GRID_FLAG)
    {
      size_t gridsize = (size_t) nlon * (size_t) nlat;
      Xt_int grid_global_shape[2] = { (Xt_int) nlat, (Xt_int) nlon }, grid_local_chunk_start[2];
      int nprocs;
      xmpi(MPI_Comm_size(comm, &nprocs));
      int nprocs2D[2], grid_local_chunk_shape[2];
      findPartition2D(nprocs2D, nprocs);
      {
        struct PPM_extent set_interval_y = { 0, nlat },
                          part_interval_y = PPM_uniform_partition(set_interval_y, nprocs2D[1], rank / nprocs2D[0]);
        grid_local_chunk_shape[0] = part_interval_y.size;
        grid_local_chunk_start[0] = part_interval_y.first;
      }
      {
        struct PPM_extent set_interval_x = { 0, nlon },
                          part_interval_x = PPM_uniform_partition(set_interval_x, nprocs2D[0], rank % nprocs2D[0]);
        grid_local_chunk_shape[1] = part_interval_x.size;
        grid_local_chunk_start[1] = part_interval_x.first;
      }
      {
        Xt_idxlist partDesc2D = xt_idxsection_new((Xt_int) 0, 2, grid_global_shape, grid_local_chunk_shape, grid_local_chunk_start);
        const int gridDecoChunks[2][2] = { [0] = {
                [0] = (int)grid_local_chunk_start[0],
                [1] = (int)grid_local_chunk_shape[0],
              }, [1] = {
                [0] = (int)grid_local_chunk_start[1],
                [1] = (int)grid_local_chunk_shape[1],
              } };
        gridID = cdiPioDistGridCreate(GRID_CURVILINEAR, (int) gridsize, nlon, nlat, 0, gridDecoChunks, partDesc2D, NULL, NULL);
        xt_idxlist_delete(partDesc2D);
      }
      size_t grid_chunk_size = (size_t) grid_local_chunk_shape[0] * (size_t) grid_local_chunk_shape[1];
      {
        double *grid_chunk = (double *) Malloc(2 * grid_chunk_size * sizeof(*grid_chunk));
        /* anti-clockwise coordinates around Amazonia */
        static const struct cart_coord region[4] = { { .lon = DEG2RAD(-85.0), .lat = DEG2RAD(-25.0) },
                                                     { .lon = DEG2RAD(-44.0), .lat = DEG2RAD(-18.0) },
                                                     { .lon = DEG2RAD(-50.0), .lat = DEG2RAD(7.0) },
                                                     { .lon = DEG2RAD(-80.0), .lat = DEG2RAD(10.0) } };
        const size_t xyRange[2][2] = { { (size_t) grid_local_chunk_start[1], (size_t) grid_local_chunk_start[0] },
                                       { (size_t) grid_local_chunk_shape[1], (size_t) grid_local_chunk_shape[0] } };
        computeCurvilinearChunk(grid_chunk, region, (size_t) nlon, (size_t) nlat, xyRange);
        gridDefXvals(gridID, grid_chunk + grid_chunk_size);
        gridDefYvals(gridID, grid_chunk);
        Free(grid_chunk);
      }
    }
    else
    {
      fputs("Error: distributed lat-lon grids are unsupported"
            " at this moment.\n",
            stderr);
      abort();
    }
#else
    fputs("Error: distributed grids are unsupported for"
          " non-MPI configurations.\n",
          stderr);
    abort();
#endif
  }
  else
  {
    if (setup->flags & PIO_WRITE_CONFIG_CREATE_CURVILINEAR_GRID_FLAG)
      gridID = createLocalCurvilinearGrid(nlon, nlat);
    else
      gridID = createGlobalLatLonGrid(nlon, nlat);
  }
  if (setup->flags & PIO_WRITE_CONFIG_CREATE_UUID_FLAG)
  {
    unsigned char uuid[CDI_UUID_SIZE];
    if (rank == 0) cdiCreateUUID(uuid);
#if USE_MPI
    MPI_Bcast(uuid, CDI_UUID_SIZE, MPI_UNSIGNED_CHAR, 0, comm);
#endif
    gridDefUUID(gridID, uuid);
  }
  return gridID;
}

/*
 * Local Variables:
 * c-file-style: "Java"
 * c-basic-offset: 2
 * indent-tabs-mode: nil
 * show-trailing-whitespace: t
 * require-trailing-newline: t
 * End:
 */