File: process_grid.cc

package info (click to toggle)
deal.ii 9.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 181,876 kB
  • sloc: cpp: 265,739; ansic: 52,054; python: 1,507; perl: 645; sh: 506; xml: 437; makefile: 73
file content (254 lines) | stat: -rw-r--r-- 9,986 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
// ---------------------------------------------------------------------
//
// Copyright (C) 2017 - 2018 by the deal.II authors
//
// This file is part of the deal.II library.
//
// The deal.II library is free software; you can use it, redistribute
// it, and/or modify it under the terms of the GNU Lesser General
// Public License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The full text of the license can be found in the file LICENSE at
// the top level of the deal.II distribution.
//
// ---------------------------------------------------------------------

#include <deal.II/base/process_grid.h>

#ifdef DEAL_II_WITH_SCALAPACK

#include <deal.II/lac/scalapack.templates.h>

DEAL_II_NAMESPACE_OPEN

namespace
{
  /**
   * Internal function to determine dimension of process grid based on the total
   * number of cores, matrix dimensions and matrix block sizes
   *
   * Amesos heuristics:
   * https://github.com/trilinos/Trilinos/blob/master/packages/amesos/src/Amesos_Scalapack.cpp#L142-L166
   *
   * Elemental default grid: El::Grid::Grid(mpi_comm,...)
   * https://github.com/elemental/Elemental/blob/master/src/core/Grid.cpp#L67-L91
   */
  inline
  std::pair<int,int> compute_processor_grid_sizes(MPI_Comm mpi_comm,
                                                  const unsigned int m, const unsigned int n,
                                                  const unsigned int block_size_m, const unsigned int block_size_n)
  {
    // Few notes from the ScaLAPACK user guide:
    // It is possible to predict the best grid shape given the number of processes available:
    // Pr x Pc <= P
    // This, however, depends on the task to be done.
    // LU , QR and QL factorizations perform better for “flat” process grids (Pr < Pc )
    // For large N, Pc = 2*Pr is a good choice, whereas for small N, one should choose small Pr
    // Square or near square grids are more optimal for Cholesky factorization.
    // LQ and RQ factorizations take advantage of “tall” grids (Pr > Pc )

    // Below we always try to create 2D processor grids:

    int n_processes;
    MPI_Comm_size(mpi_comm, &n_processes);

    // Get the total number of cores we can occupy in a rectangular dense matrix
    // with rectangular blocks when every core owns only a single block:
    const int n_processes_heuristic = int(std::ceil((1.*m)/block_size_m))*
                                      int(std::ceil((1.*n)/block_size_n));
    const int Np = std::min(n_processes_heuristic, n_processes);

    // Now we need to split Np into  Pr x Pc. Assume we know the shape/ratio
    // Pc =: ratio * Pr
    // therefore
    // Np = Pc * Pc / ratio
    // for quadratic matrices the ratio equals 1
    const double ratio = double(n)/m;
    int Pc = std::floor(std::sqrt(ratio * Np));

    // one could rounds up Pc to the number which has zero remainder from the division of Np
    // while ( Np % Pc != 0 )
    //  ++Pc;
    // but this affects the grid shape dramatically, i.e. 10 cores 3x3 becomes 2x5.

    // limit our estimate to be in [2, Np]
    int n_process_columns = std::min (Np, std::max(2, Pc));
    // finally, get the rows:
    int n_process_rows = Np / n_process_columns ;

    Assert (n_process_columns >=1 && n_process_rows >=1 && n_processes >= n_process_rows*n_process_columns,
            ExcMessage("error in process grid: "+
                       std::to_string(n_process_rows)+"x"+
                       std::to_string(n_process_columns)+
                       "="+
                       std::to_string(n_process_rows*n_process_columns)+
                       " out of " +
                       std::to_string(n_processes)));

    return std::make_pair(n_process_rows,n_process_columns);

    // For example,
    // 320x320 with 32x32 blocks and 16 cores:
    // Pc = 1.0 * Pr  => 4x4 grid
    // Pc = 0.5 * Pr  => 8x2 grid
    // Pc = 2.0 * Pr  => 3x5 grid
  }
}

namespace Utilities
{
  namespace MPI
  {

    ProcessGrid::ProcessGrid(MPI_Comm mpi_comm,
                             const std::pair<unsigned int,unsigned int> &grid_dimensions)
      :
      mpi_communicator(mpi_comm),
      this_mpi_process(Utilities::MPI::this_mpi_process(mpi_communicator)),
      n_mpi_processes(Utilities::MPI::n_mpi_processes(mpi_communicator)),
      n_process_rows(grid_dimensions.first),
      n_process_columns(grid_dimensions.second)
    {
      Assert (grid_dimensions.first > 0,
              ExcMessage("Number of process grid rows has to be positive."));
      Assert (grid_dimensions.second > 0,
              ExcMessage("Number of process grid columns has to be positive."));

      Assert (grid_dimensions.first*grid_dimensions.second <= n_mpi_processes,
              ExcMessage("Size of process grid is larger than number of available MPI processes."));

      // processor grid order.
      const bool column_major = false;

      // Initialize Cblas context from the provided communicator
      blacs_context = Csys2blacs_handle(mpi_communicator);
      const char *order = ( column_major ? "Col" : "Row" );
      // Note that blacs_context can be modified below. Thus Cblacs2sys_handle
      // may not return the same MPI communicator.
      Cblacs_gridinit(&blacs_context, order, n_process_rows, n_process_columns);

      // Blacs may modify the grid size on processes which are not used
      // in the grid. So provide copies below:
      int procrows_ = n_process_rows;
      int proccols_ = n_process_columns;
      Cblacs_gridinfo( blacs_context, &procrows_, &proccols_, &this_process_row, &this_process_column );

      // If this MPI core is not on the grid, flag it as inactive and
      // skip all jobs
      // Note that a different condition is used in FORTRAN code here
      // https://stackoverflow.com/questions/18516915/calling-blacs-with-more-processes-than-used
      if (this_process_row < 0 || this_process_column < 0)
        mpi_process_is_active = false;
      else
        mpi_process_is_active = true;

      // Create an auxiliary communicator which has root and all inactive cores.
      // Assume that inactive cores start with id=n_process_rows*n_process_columns
      const unsigned int n_active_mpi_processes = n_process_rows*n_process_columns;
      Assert (mpi_process_is_active ||
              this_mpi_process >= n_active_mpi_processes,
              ExcInternalError());

      std::vector<int> inactive_with_root_ranks;
      inactive_with_root_ranks.push_back(0);
      for (unsigned int i = n_active_mpi_processes; i < n_mpi_processes; ++i)
        inactive_with_root_ranks.push_back(i);

      // Get the group of processes in mpi_communicator
      int ierr = 0;
      MPI_Group all_group;
      ierr = MPI_Comm_group(mpi_communicator, &all_group);
      AssertThrowMPI(ierr);

      // Construct the group containing all ranks we need:
      MPI_Group inactive_with_root_group;
      const int n = inactive_with_root_ranks.size();
      ierr = MPI_Group_incl(all_group,
                            n, inactive_with_root_ranks.data(),
                            &inactive_with_root_group);
      AssertThrowMPI(ierr);

      // Create the communicator based on the group
      // Note that on most cores the communicator will be MPI_COMM_NULL.
      // FIXME: switch to MPI_Comm_create_group for MPI-3 so that only processes within the to-be subgroup call this
      ierr = MPI_Comm_create(mpi_communicator, inactive_with_root_group,
                             &mpi_communicator_inactive_with_root);
      AssertThrowMPI(ierr);

      ierr = MPI_Group_free(&all_group);
      AssertThrowMPI(ierr);
      ierr = MPI_Group_free(&inactive_with_root_group);
      AssertThrowMPI(ierr);

      // Double check that the process with rank 0 in subgroup is active:
#ifdef DEBUG
      if (mpi_communicator_inactive_with_root != MPI_COMM_NULL &&
          Utilities::MPI::this_mpi_process(mpi_communicator_inactive_with_root) == 0)
        Assert (mpi_process_is_active, ExcInternalError());
#endif
    }



    ProcessGrid::ProcessGrid(MPI_Comm mpi_comm,
                             const unsigned int n_rows_matrix,
                             const unsigned int n_columns_matrix,
                             const unsigned int row_block_size,
                             const unsigned int column_block_size)
      :
      ProcessGrid(mpi_comm,
                  compute_processor_grid_sizes(mpi_comm, n_rows_matrix, n_columns_matrix,
                                               row_block_size, column_block_size) )
    {}



    ProcessGrid::ProcessGrid(MPI_Comm mpi_comm,
                             const unsigned int n_rows,
                             const unsigned int n_columns)
      :
      ProcessGrid(mpi_comm,
                  std::make_pair(n_rows,n_columns))
    {}




    ProcessGrid::~ProcessGrid()
    {
      if (mpi_process_is_active)
        Cblacs_gridexit(blacs_context);

      if (mpi_communicator_inactive_with_root != MPI_COMM_NULL)
        MPI_Comm_free(&mpi_communicator_inactive_with_root);
    }



    template <typename NumberType>
    void ProcessGrid::send_to_inactive(NumberType *value, const int count) const
    {
      Assert (count>0, ExcInternalError());
      if (mpi_communicator_inactive_with_root != MPI_COMM_NULL)
        {
          const int ierr =
            MPI_Bcast(value,count,
                      Utilities::MPI::internal::mpi_type_id (value),
                      0/*from root*/,
                      mpi_communicator_inactive_with_root);
          AssertThrowMPI(ierr);
        }
    }

  }
}

// instantiations

template void Utilities::MPI::ProcessGrid::send_to_inactive<double>(double *, const int) const;
template void Utilities::MPI::ProcessGrid::send_to_inactive<float>(float *, const int) const;
template void Utilities::MPI::ProcessGrid::send_to_inactive<int>(int *, const int) const;

DEAL_II_NAMESPACE_CLOSE

#endif // DEAL_II_WITH_SCALAPACK