File: Ifpack2_DistributedRelaxationTest.cpp

package info (click to toggle)
trilinos 12.18.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 825,604 kB
  • sloc: cpp: 3,352,065; ansic: 432,926; fortran: 169,495; xml: 61,903; python: 40,836; sh: 32,697; makefile: 26,612; javascript: 8,535; perl: 7,136; f90: 6,372; csh: 4,183; objc: 2,620; lex: 1,469; lisp: 810; yacc: 497; awk: 364; ml: 281; php: 145
file content (219 lines) | stat: -rw-r--r-- 6,738 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

//#include <Teuchos_ConfigDefs.hpp>
//#include <Ifpack2_ConfigDefs.hpp>

//#include <Ifpack2_Version.hpp>
#include <iostream>
#include <fstream>

#include <Ifpack2_Relaxation.hpp>
#include <Teuchos_GlobalMPISession.hpp>
#include <Tpetra_CrsMatrix.hpp>
#include <Tpetra_MultiVector.hpp>
#include <Tpetra_Map.hpp>
#include <Teuchos_RCP.hpp>
#include <Teuchos_Array.hpp>
#include <Teuchos_ParameterList.hpp>
#include <Teuchos_DefaultComm.hpp>
#include <vector>

using Teuchos::RCP;
using Teuchos::rcp;
using namespace Teuchos;

template <typename stype>
void md_malloc(stype **arr, size_t n, std::string alloc_str = ""){
  *arr = new stype[n];
  if (*arr == NULL){
    std::cerr << "Memory Allocation Problem " << alloc_str << std::endl;
    exit(1);
  }
}

/**
 * Reads binary graph, the graphs are generated from mtx file using TpetraKernels_conMTX2BIN.exe.
 * See the convert TpetraKernels_conMTX2BIN.exe executable in KokkosKernels package.
 *  /perf_test/graph/TpetraKernels_conMTX2BIN.exe
 *
 */
template <typename idx, typename wt>
void read_graph_bin(idx *nv, idx *ne,idx **xadj, idx **adj, wt **ew, char *filename){


  std::ifstream myFile (filename, std::ios::in | std::ios::binary);

  myFile.read((char *) nv, sizeof(idx));
  myFile.read((char *) ne, sizeof(idx));

  md_malloc<idx>(xadj, *nv+1);
  md_malloc<idx>(adj, *ne);
  md_malloc<wt> (ew, *ne);
  myFile.read((char *) *xadj, sizeof(idx) * (*nv + 1));
  myFile.read((char *) *adj, sizeof(idx) * (*ne));
  myFile.read((char *) *ew, sizeof(wt) * (*ne));
  myFile.close();
}


typedef int zlno_t;
typedef int zgno_t;
typedef double zscalar_t;

int main( int argc, char* argv[] )
{
  if (argc < 2){
    std::cerr << "Usage:" << argv[0] << " input_bin_file [max-num-iter=500]" << std::endl;
    exit(1);
  }
  Teuchos::GlobalMPISession mpiSession(&argc, &argv);

  RCP<const Teuchos::Comm<int> > tcomm = Teuchos::DefaultComm<int>::getComm();
  int rank = tcomm->getRank();
  int max_num_iter = 500;
  if (argc >= 3){
    max_num_iter = atoi(argv[2]);
  }
  double min_norm = 0.0000001;
  if (argc == 4){
    min_norm = atof(argv[3]);
  }

  std::cout << "max_num_iter:" << max_num_iter << " min_norm:" << min_norm << std::endl;

  int nr = 0, ne = 0;
  int *xadj = NULL, *adj= NULL;
  double *vals= NULL;

  if (rank == 0){
    read_graph_bin<int, double> (
        &nr, &ne, &xadj, &adj, &vals, argv[1]);
  }

  
  tcomm->broadcast (0, sizeof(int), (char *)&nr);
  tcomm->broadcast (0, sizeof(int), (char *)&ne);
  std::cout << "nr:" << nr << " ne:" << ne <<std::endl;
  if (rank != 0){
    xadj = new int [nr+1];
    adj = new int [ne];
    vals = new double [ne];
  }
  tcomm->broadcast (0, sizeof(int) * (nr + 1), (char *)xadj);
  tcomm->broadcast (0, sizeof(int) * (ne), (char *)adj);
  tcomm->broadcast (0, sizeof(double) * (ne), (char *)vals);


  int max_num_elements = 0;
  for (zlno_t lclRow = 0; lclRow < nr; ++lclRow) {
    int begin = xadj[lclRow];
    int end = xadj[lclRow + 1];
    if (end - begin > max_num_elements) max_num_elements = end - begin;
  }

  typedef Tpetra::Map<>::node_type znode_t;
  typedef Tpetra::Map<zlno_t, zgno_t, znode_t> map_t;
  size_t numGlobalCoords = nr;
  RCP<const map_t> map = rcp (new map_t (numGlobalCoords, 0, tcomm));
  RCP<const map_t> domainMap = map;
  RCP<const map_t> rangeMap = map;

  typedef Tpetra::CrsMatrix<zscalar_t, zlno_t, zgno_t, znode_t> tcrsMatrix_t;
  typedef Tpetra::RowMatrix<zscalar_t, zlno_t, zgno_t, znode_t> row_matrix_type;
  RCP<tcrsMatrix_t> TpetraCrsMatrix(new tcrsMatrix_t (map, 0));
  const zlno_t numMyElements = map->getNodeNumElements ();
  const zgno_t myBegin = map->getGlobalElement (0);

  //std::vector<zgno_t> tmp_indices(max_num_elements);
  //std::vector<zscalar_t> tmp_vals(max_num_elements);
  zgno_t  *tmp_indices = new zgno_t[max_num_elements];
  zscalar_t *tmp_vals = new zscalar_t [max_num_elements];
  for (zlno_t lclRow = 0; lclRow < numMyElements; ++lclRow) {
    const zgno_t gblRow = map->getGlobalElement (lclRow);
    zgno_t begin = xadj[gblRow];
    zgno_t end = xadj[gblRow + 1];


    for (int i = begin; i < end; ++i) {
      tmp_indices[i - begin] = adj[i];
      tmp_vals[i - begin] = vals[i];

    }
    const Teuchos::ArrayView< const zgno_t > indices(tmp_indices, end-begin);
    const Teuchos::ArrayView< const zscalar_t > values(tmp_vals, end-begin);
    TpetraCrsMatrix->insertGlobalValues (gblRow, indices, values);
  }
  TpetraCrsMatrix->fillComplete ();


  
  delete []xadj; delete [] adj; delete []vals;
  delete []tmp_indices; delete [] tmp_vals;

  typedef Tpetra::Vector<zscalar_t, zlno_t, zgno_t, znode_t> vec_type;

  vec_type X_wanted (domainMap);
  vec_type Y_result (rangeMap);
  X_wanted.randomize ();
  TpetraCrsMatrix->apply(X_wanted, Y_result);
  vec_type X_seek (domainMap);
  X_seek.putScalar(0);

  ParameterList params, params_mt;
  params.set ("relaxation: type", "Symmetric Gauss-Seidel");
  params.set ("relaxation: sweeps", 1);
  params.set ("relaxation: zero starting solution", false);

  params_mt.set ("relaxation: type", "MT Symmetric Gauss-Seidel");
  params_mt.set ("relaxation: sweeps", 1);
  params_mt.set ("relaxation: zero starting solution", false);


  Ifpack2::Relaxation<row_matrix_type> prec_mt (TpetraCrsMatrix);
  Ifpack2::Relaxation<row_matrix_type> prec (TpetraCrsMatrix);


  prec.setParameters (params);

  prec.initialize () ;
  prec.compute () ;

  prec_mt.setParameters (params_mt);

  prec_mt.initialize () ;

  prec_mt.compute () ;

  X_seek.putScalar(0);
  for (int i = 0; i < max_num_iter; ++i){
    prec_mt.apply (Y_result, X_seek);
    vec_type X_diff(X_seek, Teuchos::Copy);
    X_diff.update (1, X_wanted, -1);
    double normInf = X_diff.normInf ();
    if (i % 10 == 0)
    std::cout << "i:" << i << " norm:" << normInf << std::endl;
    if (normInf < min_norm) break;
  }
  std::cout << "MT Flops:" << prec_mt.getApplyFlops ()
                    << " App Time:" <<  prec_mt.getApplyTime ()
                    << " Comp Time:" <<  prec_mt.getComputeTime ()
                    << " Init Time:" <<  prec_mt.getInitializeTime ()
                      << std::endl;


  X_seek.putScalar(0);
  for (int i = 0; i < max_num_iter; ++i){
    prec.apply (Y_result, X_seek);
    vec_type X_diff(X_seek, Teuchos::Copy);
    X_diff.update (1, X_wanted, -1);
    double normInf = X_diff.normInf ();
    if (i % 10 == 0)
    std::cout << "i:" << i << " norm:" << normInf << std::endl;
    if (normInf < min_norm) break;
  }
  std::cout << "Sequential Flops:" << prec.getApplyFlops ()
                << " App Time:" <<  prec.getApplyTime ()
                << " Comp Time:" <<  prec.getComputeTime ()
                << " Init Time:" <<  prec.getInitializeTime ()
                  << std::endl;
  return 0;
}