File: BcastTest.cpp

package info (click to toggle)
combblas 2.0.0-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 190,488 kB
  • sloc: cpp: 55,918; ansic: 25,134; sh: 3,691; makefile: 548; csh: 66; python: 49; perl: 21
file content (190 lines) | stat: -rw-r--r-- 7,361 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#include <mpi.h>
#include <sys/time.h>
#include <iostream>
#include <functional>
#include <algorithm>
#include <vector>
#include <sstream>
#include "CombBLAS/CombBLAS.h"
#include "CombBLAS/CommGrid3D.h"
#include "CombBLAS/SpParMat3D.h"
#include "CombBLAS/ParFriends.h"

using namespace std;
using namespace combblas;

#define EPS 0.0001

#ifdef TIMING
double cblas_alltoalltime;
double cblas_allgathertime;
#endif

#ifdef _OPENMP
int cblas_splits = omp_get_max_threads();
#else
int cblas_splits = 1;
#endif


// Simple helper class for declarations: Just the numerical type is templated
// The index type and the sequential matrix type stays the same for the whole code
// In this case, they are "int" and "SpDCCols"
template <class NT>
class PSpMat
{
public:
    typedef SpDCCols < int64_t, NT > DCCols;
    typedef SpParMat < int64_t, NT, DCCols > MPI_DCCols;
};

int main(int argc, char* argv[])
{
    int nprocs, myrank;
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);

    if(argc < 2){
        if(myrank == 0)
        {
            cout << "Usage: ./<Binary> <MatrixA> " << endl;
        }
        MPI_Finalize();
        return -1;
    }
    else {
        string Aname(argv[1]);

        shared_ptr<CommGrid> fullWorld;
        fullWorld.reset( new CommGrid(MPI_COMM_WORLD, 0, 0) );
        
        SpParMat<int64_t, double, SpDCCols < int64_t, double >> M(fullWorld);
        typedef PlusTimesSRing<double, double> PTFF;

        M.ParallelReadMM(Aname, true, maximum<double>());
        FullyDistVec<int64_t, int64_t> p( M.getcommgrid() );
        p.iota(M.getnrow(), 0);
        p.RandPerm();
        (M)(p,p,true);// in-place permute to save memory

        //M.ReadGeneralizedTuples(Aname, maximum<double>());
        
        SpParMat<int64_t, double, SpDCCols < int64_t, double >> A(M);
        //SpParMat3D<int64_t, double, SpDCCols < int64_t, double >> A3D(A, 4, true, false);
        SpParMat<int64_t, double, SpDCCols < int64_t, double >> B(M);
        //SpParMat3D<int64_t, double, SpDCCols < int64_t, double >> B3D(B, 4, false, false);

        typedef PlusTimesSRing<double, double> PTFF;
        typedef int64_t IT;
        typedef double NT;
        typedef SpDCCols < int64_t, double > DER;

        double Abcasttime = 0;
        double Abcasttime_prev;
        double Bbcasttime = 0;
        double Bbcasttime_prev;
        double t0, t1;

        int dummy, stages;
        std::shared_ptr<CommGrid> GridC = ProductGrid((A.getcommgrid()).get(), (B.getcommgrid()).get(), stages, dummy, dummy);
        
        //int buffsize = 1024 * 1024 * (512 / sizeof(IT));
        //if(myrank == 0) fprintf(stderr, "Memory to be allocated %d\n", buffsize);
        //IT * sendbuf = new IT[buffsize];
        //if(myrank == 0) fprintf(stderr, "Memory allocated\n");

        for(int phases = 1; phases <= 256; phases = phases * 2){
            if(myrank == 0) fprintf(stderr, "Running with phase: %d\n", phases);
            for(int it = 0; it < 3; it++){
                Abcasttime = 0;
                Bbcasttime = 0;
                
                std::vector< DER > PiecesOfB;
                DER CopyB = *(B.seqptr()); // we allow alias matrices as input because of this local copy
                
                CopyB.ColSplit(phases, PiecesOfB); // CopyB's memory is destroyed at this point
                MPI_Barrier(GridC->GetWorld());
                
                IT ** ARecvSizes = SpHelper::allocate2D<IT>(DER::esscount, stages);
                IT ** BRecvSizes = SpHelper::allocate2D<IT>(DER::esscount, stages);
                
                SpParHelper::GetSetSizes( *(A.seqptr()), ARecvSizes, (A.getcommgrid())->GetRowWorld());
                
                // Remotely fetched matrices are stored as pointers
                DER * ARecv;
                DER * BRecv;
                
                int Aself = (A.getcommgrid())->GetRankInProcRow();
                int Bself = (B.getcommgrid())->GetRankInProcCol();

                //int chunksize = buffsize / phases;
                //if(myrank == 0) fprintf(stderr, "chunksize: %d\n", chunksize);

                for(int p = 0; p < phases; ++p)
                {
                    SpParHelper::GetSetSizes( PiecesOfB[p], BRecvSizes, (B.getcommgrid())->GetColWorld());
                    for(int i = 0; i < stages; ++i)
                    {
                        //t0 = MPI_Wtime();
                        //MPI_Bcast(sendbuf+(chunksize*p), chunksize, MPIType<IT>(), i, GridC->GetColWorld());
                        //t1 = MPI_Wtime();
                        //Bbcasttime += (t1-t0);
                        
                        std::vector<IT> ess;

                        if(i == Aself)  ARecv = A.seqptr();	// shallow-copy
                        else {
                            ess.resize(DER::esscount);
                            for(int j=0; j< DER::esscount; ++j)
                                ess[j] = ARecvSizes[j][i];		// essentials of the ith matrix in this row
                            ARecv = new DER();				// first, create the object
                        }
                        MPI_Barrier(A.getcommgrid()->GetWorld());                       
                        t0=MPI_Wtime();
                        //SpParHelper::BCastMatrix(GridC->GetRowWorld(), *ARecv, ess, i);	// then, receive its elements
                        MPI_Barrier(A.getcommgrid()->GetWorld());                       
                        t1=MPI_Wtime();
                        Abcasttime += (t1-t0);

                        ess.clear();

                        if(i == Bself)  BRecv = &(PiecesOfB[p]);	// shallow-copy
                        else {
                            ess.resize(DER::esscount);
                            for(int j=0; j< DER::esscount; ++j)
                                ess[j] = BRecvSizes[j][i];
                            BRecv = new DER();
                        }
                        MPI_Barrier(A.getcommgrid()->GetWorld());                       
                        t0=MPI_Wtime();
                        SpParHelper::BCastMatrix(GridC->GetColWorld(), *BRecv, ess, i);	// then, receive its elements
                        MPI_Barrier(A.getcommgrid()->GetWorld());                       
                        t1=MPI_Wtime();
                        Bbcasttime += (t1-t0);

                        //if(i != Aself){
                            //if(ARecv != NULL) delete ARecv;
                        //}
                        if(i != Bself) {
                            if(BRecv != NULL) delete BRecv;
                        }
                        
                    }   // all stages executed
                    
                }
                
                
                SpHelper::deallocate2D(ARecvSizes, DER::esscount);
                SpHelper::deallocate2D(BRecvSizes, DER::esscount);
                if(myrank == 0){
                    fprintf(stderr, "Iteration : %d - Abcasttime: %lf\n", it, Abcasttime);
                    fprintf(stderr, "Iteration : %d - Bbcasttime: %lf\n", it, Bbcasttime);
                }
            }
            if(myrank == 0) fprintf(stderr, "\n\n++++++++++++++++++++++++++++++++++++++++++++\n\n\n\n");
        }
    }
    MPI_Finalize();
    return 0;
}