File: nullspace_mpi.cpp

package info (click to toggle)
amgcl 1.4.4-1
  • links: PTS, VCS
  • area: contrib
  • in suites: sid
  • size: 5,676 kB
  • sloc: cpp: 34,043; python: 747; pascal: 258; f90: 196; makefile: 20
file content (163 lines) | stat: -rw-r--r-- 5,770 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#include <vector>
#include <iostream>

#include <amgcl/backend/builtin.hpp>
#include <amgcl/adapter/crs_tuple.hpp>
#include <amgcl/coarsening/rigid_body_modes.hpp>

#include <amgcl/mpi/distributed_matrix.hpp>
#include <amgcl/mpi/make_solver.hpp>
#include <amgcl/mpi/amg.hpp>
#include <amgcl/mpi/coarsening/smoothed_aggregation.hpp>
#include <amgcl/mpi/relaxation/spai0.hpp>
#include <amgcl/mpi/solver/cg.hpp>

#include <amgcl/io/binary.hpp>
#include <amgcl/profiler.hpp>

#if defined(AMGCL_HAVE_PARMETIS)
#  include <amgcl/mpi/partition/parmetis.hpp>
#elif defined(AMGCL_HAVE_SCOTCH)
#  include <amgcl/mpi/partition/ptscotch.hpp>
#endif

int main(int argc, char *argv[]) {
    // The command line should contain the matrix, the RHS, and the coordinate files:
    if (argc < 4) {
        std::cerr << "Usage: " << argv[0] << " <A.bin> <b.bin> <coo.bin>" << std::endl;
        return 1;
    }

    amgcl::mpi::init mpi(&argc, &argv);
    amgcl::mpi::communicator world(MPI_COMM_WORLD);

    // The profiler:
    amgcl::profiler<> prof("Nullspace");

    // Read the system matrix, the RHS, and the coordinates:
    prof.tic("read");
    // Get the global size of the matrix:
    ptrdiff_t rows = amgcl::io::crs_size<ptrdiff_t>(argv[1]);

    // Split the matrix into approximately equal chunks of rows, and
    // make sure each chunk size is divisible by 3.
    ptrdiff_t chunk = (rows + world.size - 1) / world.size;
    if (chunk % 3) chunk += 3 - chunk % 3;

    ptrdiff_t row_beg = std::min(rows, chunk * world.rank);
    ptrdiff_t row_end = std::min(rows, row_beg + chunk);
    chunk = row_end - row_beg;

    // Read our part of the system matrix, the RHS and the coordinates.
    std::vector<ptrdiff_t> ptr, col;
    std::vector<double> val, rhs, coo;
    amgcl::io::read_crs(argv[1], rows, ptr, col, val, row_beg, row_end);

    ptrdiff_t n, m;
    amgcl::io::read_dense(argv[2], n, m, rhs, row_beg, row_end);
    amgcl::precondition(n == rows && m == 1, "The RHS file has wrong dimensions");

    amgcl::io::read_dense(argv[3], n, m, coo, row_beg / 3, row_end / 3);
    amgcl::precondition(n * 3 == rows && m == 3, "The coordinate file has wrong dimensions");
    prof.toc("read");

    if (world.rank == 0) {
        std::cout
            << "Matrix " << argv[1] << ": " << rows << "x" << rows << std::endl
            << "RHS "    << argv[2] << ": " << rows << "x1" << std::endl
            << "Coords " << argv[3] << ": " << rows / 3 << "x3" << std::endl;
    }

    // Declare the backends and the solver type
    typedef amgcl::backend::builtin<double> SBackend; // the solver backend
    typedef amgcl::backend::builtin<float>  PBackend; // the preconditioner backend

    typedef amgcl::mpi::make_solver<
        amgcl::mpi::amg<
            PBackend,
            amgcl::mpi::coarsening::smoothed_aggregation<PBackend>,
            amgcl::mpi::relaxation::spai0<PBackend>
            >,
        amgcl::mpi::solver::cg<PBackend>
        > Solver;

    // The distributed matrix
    auto A = std::make_shared<amgcl::mpi::distributed_matrix<SBackend>>(
            world, std::tie(chunk, ptr, col, val));

    // Partition the matrix, the RHS vector, and the coordinates.
    // If neither ParMETIS not PT-SCOTCH are not available,
    // just keep the current naive partitioning.
#if defined(AMGCL_HAVE_PARMETIS) || defined(AMGCL_HAVE_SCOTCH)
#  if defined(AMGCL_HAVE_PARMETIS)
    typedef amgcl::mpi::partition::parmetis<SBackend> Partition;
#  elif defined(AMGCL_HAVE_SCOTCH)
    typedef amgcl::mpi::partition::ptscotch<SBackend> Partition;
#  endif

    if (world.size > 1) {
        auto t = prof.scoped_tic("partition");
        Partition part;

        // part(A) returns the distributed permutation matrix.
        // Keep the DOFs belonging to the same grid nodes together
        // (use block-wise partitioning with block size 3).
        auto P = part(*A, 3);
        auto R = transpose(*P);

        // Reorder the matrix:
        A = product(*R, *product(*A, *P));

        // Reorder the RHS vector and the coordinates:
        R->move_to_backend();
        std::vector<double> new_rhs(R->loc_rows());
        std::vector<double> new_coo(R->loc_rows());
        amgcl::backend::spmv(1, *R, rhs, 0, new_rhs);
        amgcl::backend::spmv(1, *R, coo, 0, new_coo);
        rhs.swap(new_rhs);
        coo.swap(new_coo);

        // Update the number of the local rows
        // (it may have changed as a result of permutation).
        chunk = A->loc_rows();
    }
#endif

    // Solver parameters:
    Solver::params prm;
    prm.solver.maxiter = 500;
    prm.precond.coarsening.aggr.eps_strong = 0;

    // Convert the coordinates to the rigid body modes.
    // The function returns the number of near null-space vectors
    // (3 in 2D case, 6 in 3D case) and writes the vectors to the
    // std::vector<double> specified as the last argument:
    prm.precond.coarsening.aggr.nullspace.cols = amgcl::coarsening::rigid_body_modes(
            3, coo, prm.precond.coarsening.aggr.nullspace.B);

    // Initialize the solver with the system matrix.
    prof.tic("setup");
    Solver solve(world, A, prm);
    prof.toc("setup");

    // Show the mini-report on the constructed solver:
    if (world.rank == 0) std::cout << solve << std::endl;

    // Solve the system with the zero initial approximation:
    int iters;
    double error;
    std::vector<double> x(chunk, 0.0);

    prof.tic("solve");
    std::tie(iters, error) = solve(*A, rhs, x);
    prof.toc("solve");

    // Output the number of iterations, the relative error,
    // and the profiling data:
    if (world.rank == 0) {
        std::cout
            << "Iters: " << iters << std::endl
            << "Error: " << error << std::endl
            << prof << std::endl;
    }
}