File: init_finalize.cc

package info (click to toggle)
deal.ii 9.7.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 326,024 kB
  • sloc: cpp: 440,899; ansic: 77,337; python: 3,307; perl: 1,041; sh: 1,022; xml: 252; makefile: 97; javascript: 14
file content (449 lines) | stat: -rw-r--r-- 15,370 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
// ---------------------------------------------------------------------
//
// Copyright (C) 2023 - 2025 by the deal.II authors
//
// This file is part of the deal.II library.
//
// The deal.II library is free software; you can use it, redistribute
// it, and/or modify it under the terms of the GNU Lesser General
// Public License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The full text of the license can be found in the file LICENSE.md at
// the top level directory of deal.II.
//
// ---------------------------------------------------------------------

#include <deal.II/base/init_finalize.h>
#include <deal.II/base/mpi.h>
#include <deal.II/base/multithread_info.h>

#include <deal.II/lac/la_parallel_block_vector.h>
#include <deal.II/lac/la_parallel_vector.h>
#include <deal.II/lac/vector_memory.h>

#include <Kokkos_Core.hpp>

#ifdef DEAL_II_WITH_TRILINOS
#  ifdef DEAL_II_WITH_MPI
#    include <deal.II/lac/trilinos_parallel_block_vector.h>
#    include <deal.II/lac/trilinos_vector.h>

DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
#    include <Epetra_MpiComm.h>
DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
#  endif
#endif

#ifdef DEAL_II_WITH_PETSC
#  include <deal.II/lac/petsc_block_vector.h>
#  include <deal.II/lac/petsc_vector.h>

#  include <petscsys.h>
#endif

#ifdef DEAL_II_WITH_SLEPC
#  include <deal.II/lac/slepc_solver.h>

#  include <slepcsys.h>
#endif

#ifdef DEAL_II_WITH_P4EST
#  include <p4est_bits.h>
#endif

#ifdef DEAL_II_TRILINOS_WITH_ZOLTAN
#  include <zoltan_cpp.h>
#endif

#include <set>
#include <string>


DEAL_II_NAMESPACE_OPEN


/* Force initialization of static struct: */
InitFinalize::Signals InitFinalize::signals = InitFinalize::Signals();


InitFinalize::InitFinalize([[maybe_unused]] int    &argc,
                           [[maybe_unused]] char **&argv,
                           const InitializeLibrary &libraries,
                           const unsigned int       max_num_threads)
  : libraries(libraries)
{
  [[maybe_unused]] static bool constructor_has_already_run = false;
  Assert(constructor_has_already_run == false,
         ExcMessage("You can only create a single object of this class "
                    "in a program since it initializes the MPI system."));


  [[maybe_unused]] int ierr = 0;
#ifdef DEAL_II_WITH_MPI
  if (static_cast<bool>(libraries & InitializeLibrary::MPI))
    {
      // if we have PETSc, we will initialize it and let it handle MPI.
      // Otherwise, we will do it.
      int MPI_has_been_started = 0;
      ierr                     = MPI_Initialized(&MPI_has_been_started);
      AssertThrowMPI(ierr);
      AssertThrow(MPI_has_been_started == 0,
                  ExcMessage("MPI error. You can only start MPI once!"));

      int provided;
      // this works like ierr = MPI_Init (&argc, &argv); but tells MPI that
      // we might use several threads but never call two MPI functions at the
      // same time. For an explanation see on why we do this see
      // http://www.open-mpi.org/community/lists/users/2010/03/12244.php
      int wanted = MPI_THREAD_SERIALIZED;
      ierr       = MPI_Init_thread(&argc, &argv, wanted, &provided);
      AssertThrowMPI(ierr);

      // disable for now because at least some implementations always return
      // MPI_THREAD_SINGLE.
      // Assert(max_num_threads==1 || provided != MPI_THREAD_SINGLE,
      //    ExcMessage("MPI reports that we are not allowed to use multiple
      //    threads."));
    }
#endif

    // we are allowed to call MPI_Init ourselves and PETScInitialize will
    // detect this. This allows us to use MPI_Init_thread instead.
#ifdef DEAL_II_WITH_PETSC
  PetscErrorCode pierr;
#  ifdef DEAL_II_WITH_SLEPC
  // Initialize SLEPc (with PETSc):
  if (static_cast<bool>(libraries & InitializeLibrary::SLEPc))
    {
      finalize_petscslepc = SlepcInitializeCalled ? false : true;
      pierr               = SlepcInitialize(&argc, &argv, nullptr, nullptr);
      AssertThrow(pierr == 0, SLEPcWrappers::SolverBase::ExcSLEPcError(pierr));
    }
#  else
  // or just initialize PETSc alone:
  if (static_cast<bool>(libraries & InitializeLibrary::PETSc))
    {
      finalize_petscslepc = PetscInitializeCalled ? false : true;
      pierr               = PetscInitialize(&argc, &argv, nullptr, nullptr);
      AssertThrow(pierr == 0, ExcPETScError(pierr));
    }
#  endif

  // Disable PETSc exception handling. This just prints a large wall
  // of text that is not particularly helpful for what we do:
  if (static_cast<bool>(libraries & InitializeLibrary::SLEPc) ||
      static_cast<bool>(libraries & InitializeLibrary::PETSc))
    {
      pierr = PetscPopSignalHandler();
      AssertThrow(pierr == 0, ExcPETScError(pierr));
    }
#endif

    // Initialize zoltan
#ifdef DEAL_II_TRILINOS_WITH_ZOLTAN
  if (static_cast<bool>(libraries & InitializeLibrary::Zoltan))
    {
      float version;
      Zoltan_Initialize(argc, argv, &version);
    }
#endif

    // Initialize p4est and libsc components
#ifdef DEAL_II_WITH_P4EST
  if (static_cast<bool>(libraries & InitializeLibrary::P4EST))
    {
#  if DEAL_II_P4EST_VERSION_GTE(2, 5, 0, 0)
      // This feature is broken in version 2.0.0 for calls to
      // MPI_Comm_create_group (see cburstedde/p4est#30).
      // Disabling it leads to more verbose p4est error messages
      // which should be fine.
      sc_init(MPI_COMM_WORLD, 0, 0, nullptr, SC_LP_SILENT);
#  endif
      p4est_init(nullptr, SC_LP_SILENT);
    }
#endif

  constructor_has_already_run = true;


  // Now also see how many threads we'd like to run
  if (max_num_threads != numbers::invalid_unsigned_int)
    {
      // set maximum number of threads (also respecting the environment
      // variable that the called function evaluates) based on what the
      // user asked
      MultithreadInfo::set_thread_limit(max_num_threads);
    }
  else
    // user wants automatic choice
    {
      unsigned int n_threads = MultithreadInfo::n_cores();
#ifdef DEAL_II_WITH_MPI
      if (static_cast<bool>(libraries & InitializeLibrary::MPI))
        {
          int MPI_has_been_started = 0;
          int ierr                 = MPI_Initialized(&MPI_has_been_started);
          AssertThrowMPI(ierr);

          // we need to figure out how many MPI processes there are on the
          // current node, as well as how many CPU cores we have. for the
          // first task, check what get_hostname() returns and then do an
          // allgather so each processor gets the answer
          //
          // in calculating the length of the string, don't forget the
          // terminating \0 on C-style strings
          const std::string hostname = Utilities::System::get_hostname();

          int my_hostname_size  = hostname.size() + 1;
          int max_hostname_size = -1;
          ierr                  = MPI_Allreduce(&my_hostname_size,
                               &max_hostname_size,
                               1,
                               MPI_INT,
                               MPI_MAX,
                               MPI_COMM_WORLD);
          AssertThrowMPI(ierr);
          std::vector<char> hostname_array(max_hostname_size);
          std::copy(hostname.c_str(),
                    hostname.c_str() + hostname.size() + 1,
                    hostname_array.begin());

          int n_mpi_processes = 1;
          if (MPI_has_been_started)
            {
              ierr = MPI_Comm_size(MPI_COMM_WORLD, &n_mpi_processes);
              AssertThrowMPI(ierr);
            }
          std::vector<char> all_hostnames(max_hostname_size * n_mpi_processes);
          ierr = MPI_Allgather(hostname_array.data(),
                               max_hostname_size,
                               MPI_CHAR,
                               all_hostnames.data(),
                               max_hostname_size,
                               MPI_CHAR,
                               MPI_COMM_WORLD);
          AssertThrowMPI(ierr);

          // search how often our own hostname appears and the how-manyth
          // instance the current process represents
          unsigned int n_local_processes   = 0;
          unsigned int nth_process_on_host = 0;
          int          rank                = 0;
          if (MPI_has_been_started)
            {
              ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
              AssertThrowMPI(ierr);
            }
          for (int i = 0; i < n_mpi_processes; ++i)
            if (std::string(all_hostnames.data() + i * max_hostname_size) ==
                hostname)
              {
                ++n_local_processes;
                if (i <= rank)
                  ++nth_process_on_host;
              }
          Assert(nth_process_on_host > 0, ExcInternalError());


          // compute how many cores each process gets. if the number does not
          // divide evenly, then we get one more core if we are among the
          // first few processes
          //
          // if the number would be zero, round up to one since every process
          // needs to have at least one thread
          n_threads =
            std::max(MultithreadInfo::n_cores() / n_local_processes +
                       (nth_process_on_host <=
                            MultithreadInfo::n_cores() % n_local_processes ?
                          1 :
                          0),
                     1U);
        }
#endif

      // finally set this number of threads
      MultithreadInfo::set_thread_limit(n_threads);
    }

  // Initialize Kokkos
  if (static_cast<bool>(libraries & InitializeLibrary::Kokkos))
    {
      // argv has argc+1 elements and the last one is a nullptr. For appending
      // one element we thus create a new argv by copying the first argc
      // elements, append the new option, and then a nullptr.
      //
      // We do get in trouble, though, if a user program is called with
      // '--help' as a command line argument. This '--help' gets passed on to
      // Kokkos, which promptly responds with a lengthy message that the user
      // likely did not intend. As a consequence, filter out this specific
      // flag.
      std::vector<char *> argv_new;
      for (auto *const arg : make_array_view(&argv[0], &argv[0] + argc))
        if (std::strcmp(arg, "--help") != 0)
          argv_new.push_back(arg);

      std::stringstream threads_flag;
#if DEAL_II_KOKKOS_VERSION_GTE(3, 7, 0)
      threads_flag << "--kokkos-num-threads=" << MultithreadInfo::n_threads();
#else
      threads_flag << "--kokkos-threads=" << MultithreadInfo::n_threads();
#endif
      const std::string threads_flag_string = threads_flag.str();
      argv_new.push_back(const_cast<char *>(threads_flag_string.c_str()));
      argv_new.push_back(nullptr);

      // The first argument in Kokkos::initialize is of type int&. Hence, we
      // need to define a new variable to pass to it (instead of using argc+1
      // inline).
      int argc_new = argv_new.size() - 1;
      Kokkos::initialize(argc_new, argv_new.data());
    }

  // As a final step call the at_mpi_init() signal handler.
  signals.at_mpi_init();
}



void
InitFinalize::register_request(MPI_Request &request)
{
  // insert if it is not in the set already:
  requests.insert(&request);
}



void
InitFinalize::unregister_request(MPI_Request &request)
{
  Assert(requests.find(&request) != requests.end(),
         ExcMessage(
           "You tried to call unregister_request() with an invalid request."));

  requests.erase(&request);
}



std::set<MPI_Request *> InitFinalize::requests;



void
InitFinalize::finalize()
{
  if (!is_finalized)
    {
      // First, call the at_mpi_finalize() signal handler.
      signals.at_mpi_finalize();

      // make memory pool release all PETSc/Trilinos/MPI-based vectors that
      // are no longer used at this point. this is relevant because the static
      // object destructors run for these vectors at the end of the program
      // would run after MPI_Finalize is called, leading to errors

#ifdef DEAL_II_WITH_MPI
      // Before exiting, wait for nonblocking communication to complete:
      for (auto *request : requests)
        {
          const int ierr = MPI_Wait(request, MPI_STATUS_IGNORE);
          AssertThrowMPI(ierr);
        }

      // Start with deal.II MPI vectors and delete vectors from the pools:
      GrowingVectorMemory<
        LinearAlgebra::distributed::Vector<double>>::release_unused_memory();
      GrowingVectorMemory<LinearAlgebra::distributed::BlockVector<double>>::
        release_unused_memory();
      GrowingVectorMemory<
        LinearAlgebra::distributed::Vector<float>>::release_unused_memory();
      GrowingVectorMemory<LinearAlgebra::distributed::BlockVector<float>>::
        release_unused_memory();

      // Next with Trilinos:
#  ifdef DEAL_II_WITH_TRILINOS
      GrowingVectorMemory<
        TrilinosWrappers::MPI::Vector>::release_unused_memory();
      GrowingVectorMemory<
        TrilinosWrappers::MPI::BlockVector>::release_unused_memory();
#  endif
#endif


      // Now deal with PETSc (with or without MPI). Only delete the vectors if
      // finalize hasn't been called yet, otherwise this will lead to errors.
#ifdef DEAL_II_WITH_PETSC
      if (!PetscFinalizeCalled)
        {
          GrowingVectorMemory<
            PETScWrappers::MPI::Vector>::release_unused_memory();
          GrowingVectorMemory<
            PETScWrappers::MPI::BlockVector>::release_unused_memory();
        }
#  ifdef DEAL_II_WITH_SLEPC
      // and now end SLEPc with PETSc if we did so
      if (static_cast<bool>(libraries & InitializeLibrary::SLEPc) &&
          (finalize_petscslepc))
        {
          PetscErrorCode ierr = SlepcFinalize();
          AssertThrow(ierr == 0,
                      SLEPcWrappers::SolverBase::ExcSLEPcError(ierr));
        }
#  else
      // or just end PETSc if we did so
      if (static_cast<bool>(libraries & InitializeLibrary::PETSc) &&
          (finalize_petscslepc))
        {
          PetscErrorCode ierr = PetscFinalize();
          AssertThrow(ierr == 0, ExcPETScError(ierr));
        }
#  endif
#endif

#ifdef DEAL_II_WITH_P4EST
      // now end p4est and libsc
      // Note: p4est has no finalize function
      if (static_cast<bool>(libraries & InitializeLibrary::P4EST))
        sc_finalize();
#endif


      // Finalize Kokkos
      if (static_cast<bool>(libraries & InitializeLibrary::Kokkos))
        Kokkos::finalize();

        // only MPI_Finalize if we are running with MPI. We also need to do this
        // when running PETSc, because we initialize MPI ourselves before
        // calling PetscInitialize
#ifdef DEAL_II_WITH_MPI
      int       MPI_has_been_started = 0;
      const int ierr                 = MPI_Initialized(&MPI_has_been_started);
      AssertThrowMPI(ierr);
      if (static_cast<bool>(libraries & InitializeLibrary::MPI) &&
          (MPI_has_been_started))
        {
          if (std::uncaught_exceptions() > 0)
            {
              // do not try to call MPI_Finalize to avoid a deadlock.
            }
          else
            {
              const int ierr = MPI_Finalize();
              AssertNothrow(ierr == MPI_SUCCESS, dealii::ExcMPI(ierr));
            }
        }
#endif
      is_finalized = true;
    }
}



InitFinalize::~InitFinalize()
{
  finalize();
}


DEAL_II_NAMESPACE_CLOSE