File: matrixDistributed.h

package info (click to toggle)
groops 0%2Bgit20250907%2Bds-1
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 11,140 kB
  • sloc: cpp: 135,607; fortran: 1,603; makefile: 20
file content (204 lines) | stat: -rw-r--r-- 12,326 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/***********************************************/
/**
* @file matrixDistributed.h
*
* @brief Positve definte matrix in distributed memory.
*
* @author Torsten Mayer-Guerr
* @author Andreas Kvas
* @date 2011-01-30
*
*/
/***********************************************/

#ifndef __GROOPS_MATRIXDISTRIBUTED__
#define __GROOPS_MATRIXDISTRIBUTED__

#include "base/importStd.h"
#include "base/matrix.h"
#include "parallel/parallel.h"

/***** CLASS ***********************************/

/** @brief Representation of a positve definte matrix in distributed memory
* All algorithms operate only on the upper triangle of the matrix.
* @ingroup parallelGroup */
class MatrixDistributed
{
  Parallel::CommunicatorPtr             comm;
  std::function<UInt(UInt, UInt, UInt)> calcRank;
  std::vector<UInt>                    _blockIndex;

  std::vector<std::vector<std::pair<UInt, UInt>>> _row;      // each column, used row -> idx to _N and _rank
  std::vector<std::vector<std::pair<UInt, UInt>>> _column;   // each row, used column -> idx to _N and _rank
  std::vector<Matrix>               _N;        // unorderd list of used blocks
  std::vector<UInt>                 _rank;     // unorderd list of rank of used blocks

  Bool isMyRank(UInt idx) const {return (Parallel::myRank(comm) == _rank[idx]);}
  UInt index(UInt row, UInt col) const;        // NULLINDEX if not set
  // call: loopBlockColumn({rowStart, rowPastEnd}, col, [&](UInt row, UInt idx) {_N[idx] == N(row,col)});
  void loopBlockColumn(const std::array<UInt,2> &rows, UInt col, std::function<void(UInt, UInt)> block) const;
  // call: loopBlockRow(row, {colStart, colPastEnd}, [&](UInt col, UInt idx) {_N[idx] == N(row,col)});
  void loopBlockRow   (UInt row, const std::array<UInt,2> &cols, std::function<void(UInt, UInt)> block) const;

  // parallel
  // call: usedRanks = usedRanksInColumn({rowStart, rowPastEnd}, col);
  std::vector<Bool> usedRanksInColumn(const std::array<UInt,2> &rows, UInt col) const;
  // call: usedRanks = usedRanksInRow(row, {colStart, colPastEnd});
  std::vector<Bool> usedRanksInRow(UInt row, const std::array<UInt,2> &cols) const;
  void broadCast(Matrix &x, UInt idx, const std::vector<Bool> &usedRank);
  void reduceSum(Matrix &x, UInt idx, const std::vector<Bool> &usedRank, Bool free=TRUE);


  /* @brief Solve an triangular system of equations \f$ \mathbf{W}\mathbf{y} = \mathbf{x}\f$
  * The input must be initialized at all nodes.
  * The sum other all nodes defines the input (reduceSum is called internally)
  * Output is valid at master only. */
  void triangularSolve(std::vector<Matrix> &x) {triangularSolve(x, 0, blockCount());}
  void triangularSolve(std::vector<Matrix> &x, UInt startBlock, UInt countBlock);

  /* @brief Solve a part of a triangular system of equations \f$ \mathbf{W}^T\mathbf{y} = \mathbf{x}\f$
  * This corresponds to the partly @a cholesky function.
  * The input must be initialized at all nodes.
  * The sum other all nodes defines the input (reduceSum is called internally)
  * Output is valid at master only. */
  void triangularTransSolve(std::vector<Matrix> &x) {triangularTransSolve(x, 0, blockCount(), TRUE);}
  void triangularTransSolve(std::vector<Matrix> &x, UInt startBlock, UInt countBlock, Bool collect);

public:
  /// Default constructor.
  MatrixDistributed();

  /** @brief Initialize matrix structure (number of blocks, block sizes, communicator, and rank calculation).
  * The @a blockIndex is a vector of ascending indices starting with zero, where each adjacent pair of elements define the position and shape of the subblocks.
  * Hence, all elements \f$ n_{ij} \f$  which satisfy \f$ blockIndex[k] \leq  i < blockIndex[k+1] \land blockIndex[l] \leq j < blockIndex[l+1] \f$ are found in block (@a k, @a l).
  * @param blockIndex: boundary indices of the sub-blocks.
  * @param comm: Parallel communicator of the matrix (default: MPI_COMM_WORLD).
  * @param calcRank: function handler to determine the process rank of block(i,k) (default: block cyclic distribution). */
  explicit MatrixDistributed(const std::vector<UInt> &blockIndex, Parallel::CommunicatorPtr comm, const std::function<UInt(UInt, UInt, UInt)> &calcRank=nullptr);

  /** @copydoc MatrixDistributed(const std::vector<UInt> &, Parallel::CommunicatorPtr, const std::function<UInt(UInt, UInt, UInt)> &)
  * This method allocates the upper block triangle of a symmetric matrix with zero matrices. */
  void init(const std::vector<UInt> &blockIndex, Parallel::CommunicatorPtr comm, const std::function<UInt(UInt, UInt, UInt)> &calcRank=nullptr);

  /** @copydoc MatrixDistributed(const std::vector<UInt> &, Parallel::CommunicatorPtr, const std::function<UInt(UInt, UInt, UInt)> &)
  * No memory is allocated. To assign blocks to processes and allocate memory, @a setBlock(UInt i, UInt k, UInt rank) has to be called. */
  void initEmpty(const std::vector<UInt> &blockIndex, Parallel::CommunicatorPtr comm, const std::function<UInt(UInt, UInt, UInt)> &calcRank=nullptr);

  // =========================================

  /** @brief Set a new handler to calculate the rank of new blocks.
  * If @p calcRank is nullptr a default block cyclic distribution is assumed.
  * This function must be called by all processes within the communcator in the matrix! */
  void setCalculateRank(const std::function<UInt(UInt, UInt, UInt)> &calcRank);

  /** @brief returns the handler to calculate the rank of new blocks. */
  const std::function<UInt(UInt, UInt, UInt)> &getCalculateRank() const {return calcRank;}

  /** @brief The default calculateRank handler.
  * Block cyclic distribution.
  * This function must be called by all processes within the communcator in the matrix! */
  static UInt calculateRankBlockCyclic(UInt i, UInt k, UInt commSize);

  /** @brief Assigns block (@a i, @a k) to process @a rank and allocates memory (only at the parent process).
  * If no rank is specified, the calculateRank handler is called for new blocks.
  * This function must be called by all processes within the communcator in the matrix! */
  UInt setBlock(UInt i, UInt k, UInt rank=NULLINDEX);

  /** @brief Removes the block rows/columns.
  * This function erases the block rows/columns containing the diagonal blocks @a startIndex to @a startIndex+count-1
  * effectively reducing the size by blockSize[startIndex] + ... + blockSize[startIndex+count-1] elements.
  * This function must be called by all processes within the communcator in the matrix!
  * @param startIndex index of block row/column to remove
  * @param count number of rows/columns to remove  */
  void eraseBlocks(UInt startIndex, UInt count=1);

  // =========================================

  Parallel::CommunicatorPtr communicator() const {return comm;}
  std::vector<UInt>         blockIndex()   const {return _blockIndex;}

  UInt parameterCount()            const {return _blockIndex.at(blockCount());}          //!< Number of rows/columns (dimension) of distributed matrix
  UInt dimension()                 const {return parameterCount();}                      //!< Number of rows/columns (dimension) of distributed matrix
  UInt blockIndex(UInt i)          const {return _blockIndex.at(i);}                     //!< Start index of block @a i
  UInt blockSize(UInt i)           const {return _blockIndex.at(i+1)-_blockIndex.at(i);} //!< Size of block @a i
  UInt blockCount()                const {return _blockIndex.size()-1;}                  //!< Number of block rows/columns
  UInt rank(UInt i, UInt k)        const;                                                //!< Rank of process holding block (@a i, @a k)
  Bool isMyRank(UInt i, UInt k)    const {return (Parallel::myRank(comm) == rank(i,k));} //!< Returns TRUE if the calling process holds block (@a i, @a k) within the underlying communicator of the matrix
  Bool isBlockUsed(UInt i, UInt k) const {return (rank(i,k) != NULLINDEX);}              //!< Returns TRUE if block (@a i, @a k) is assigned to a process
  UInt index2block(UInt i)         const;                                                //!< Returns the index of the block which holds element @a i

  Matrix       &N(UInt i, UInt k);       //!< Returns a writable reference to block (@a i, @a k). Throws an exception if the block is not assigned to a process.
  const Matrix &N(UInt i, UInt k) const; //!< Returns a read only reference to block (@a i, @a k). Throws an exception if the block is not assigned to a process.

  /// Fill all matrix blocks with zero.
  void setNull();

  /// Reduce block (@a i, @a k) on its parent process. After the operation, the memory on all other processes is freed.
  void reduceSum(UInt i, UInt k);

  /// Reduce all assigned blocks of the matrix on their parent processes. After the operation, the memory on all other processes is freed.
  void reduceSum(Bool timing=TRUE);

  // =========================================

  /** @brief (In-place) Cholesky decomposition of the distributed matrix. */
  void cholesky(Bool timing=TRUE) {cholesky(timing, 0, blockCount(), TRUE);}

  /** @brief Performs a part of the Cholesky decomposition.
  * The Cholesky decomposition must be already performed for the blocks before @p startBlock.
  * The blocks after @p startBlock + @p countBlock contain at ouput the normal matrix where all parameters before
  * are eliminated. If not @p collect, @a reduceSum must be called afterwards for these blocks. */
  void cholesky(Bool timing, UInt startBlock, UInt countBlock, Bool collect);

  /** @brief Solve the system of equations \f$ \mathbf{N}\mathbf{x} = \mathbf{n}\f$
  * Performs @a cholesky, @a triangularTransSolve, and @a triangularSolve.
  * The input must be valid at master only. Output is valid at master only. */
  Matrix solve(const_MatrixSliceRef n, Bool timing=TRUE);

  /** @brief Solve a triangular system of equations \f$ \mathbf{W}\mathbf{y} = \mathbf{x}\f$
  * \f$ \mathbf{W} \f$ is assumed to be an upper triangular matrix.
  * The input must be valid at master only. Output is valid at master only. */
  void triangularSolve(MatrixSliceRef x);

  /** @brief Solve a triangular system of equations \f$ \mathbf{W}^T\mathbf{y} = \mathbf{x}\f$
  * \f$ \mathbf{W} \f$ is assumed to be an upper triangular matrix.
  * The input must be valid at master only. Output is valid at master only. */
  void triangularTransSolve(MatrixSliceRef x) {triangularTransSolve(x, 0, blockCount());}
  void triangularTransSolve(MatrixSliceRef x, UInt startBlock, UInt countBlock);

  /** @brief Compute the inverse of the (upper triangular) Cholesky  factor \f$ \mathbf{W}\f$ */
  void choleskyInverse(Bool timing=TRUE) {choleskyInverse(timing, 0, blockCount());}
  void choleskyInverse(Bool timing, UInt startBlock, UInt countBlock);

  /** @brief Compute product \f$ \mathbf{U}\mathbf{U}^T \f$, where \f$ \mathbf{U} \f$ is either a Cholesky factor or its inverse. */
  void choleskyProduct(Bool timing=TRUE);

  /** @brief Compute the sparse inverse of the matrix from the Cholesky  factor \f$ \mathbf{W}\f$.
  * Computes the upper triangle of the inverse of the symmetric matrix. The structure of the Cholesky factor is preserved.  */
  void cholesky2SparseInverse(Bool timing=TRUE);

  // =========================================

  /** @brief Reorder parameters in a symmetric matrix.
  * The parameter @a index contains the indices of the elements in the reordered matrix.
  * Indices with NULLINDEX are inserted as zero elements into the new matrix.
  * @param index: indices of the original matrix elements in the reordered matrix
  * @param blockIndex: boundary indices of the sub-blocks.
  * @param calcRank: function handler to determine the process rank of block(i,k) (default: block cyclic distribution). */
  void reorder(const std::vector<UInt> &index, const std::vector<UInt> &blockIndex, const std::function<UInt(UInt, UInt, UInt)> &calcRank=nullptr);

  // =========================================

  /** @brief Compute boundary indices for distributed blocks from parameter count and block size.
  * If blockSize is zero, the matrix consists of a single block. */
  static std::vector<UInt> computeBlockIndex(UInt parameterCount, UInt blockSize=2048);

  friend class GnssProcessingStep;
  friend class GnssParametrizationAmbiguities;
  friend class SlrProcessingStep;
};

//***********************************************/

#endif