1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
/*------------------------------------------------------------------------------------------------*/
/* Copyright (C) by the DBCSR developers group - All rights reserved */
/* This file is part of the DBCSR library. */
/* */
/* For information on the license, see the LICENSE file. */
/* For further information please visit https://dbcsr.cp2k.org */
/* SPDX-License-Identifier: GPL-2.0+ */
/*------------------------------------------------------------------------------------------------*/
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <random>
#include <vector>
#include <mpi.h>
#include <dbcsr.h>
// Random distribution by using round-robin assignment
// of blocks to processors
std::vector<int> random_dist(int dist_size, int nbins) {
std::vector<int> dist(dist_size);
for (int i = 0; i < dist_size; i++) dist[i] = i % nbins;
return dist;
}
// DBCSR example 3
// This example shows how to multiply two DBCSR matrices
int main(int argc, char* argv[]) {
// initialize MPI
MPI_Init(&argc, &argv);
// setup the mpi environment
int mpi_size, mpi_rank;
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
// make 2D grid
int dims[2] = {0};
MPI_Dims_create(mpi_size, 2, dims);
int periods[2] = {1};
int reorder = 0;
MPI_Comm group;
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, reorder, &group);
int coord[2];
MPI_Cart_coords(group, mpi_rank, 2, coord);
for (int i = 0; i != mpi_size; ++i) {
if (mpi_rank == i) {
std::cout << "I'm processor " << mpi_rank << " over " << mpi_size << " proc"
<< ", (" << coord[0] << ", " << coord[1] << ") in the 2D grid" << std::endl;
}
MPI_Barrier(MPI_COMM_WORLD);
}
// initialize the DBCSR library
c_dbcsr_init_lib(MPI_COMM_WORLD, nullptr);
// Total number of blocks
int nrows_1 = 4;
int ncols_1 = 5;
int nrows_2 = 5;
int ncols_2 = 4;
// Block sizes
std::vector<int> row_blk_sizes_1 = {2, 3, 5, 2};
std::vector<int> col_blk_sizes_1 = {3, 3, 4, 6, 2};
std::vector<int> row_blk_sizes_2 = col_blk_sizes_1;
std::vector<int> col_blk_sizes_2 = {5, 2, 5, 3};
auto row_dist_1 = random_dist(nrows_1, dims[0]);
auto col_dist_1 = random_dist(ncols_1, dims[1]);
auto row_dist_2 = random_dist(nrows_2, dims[0]);
auto col_dist_2 = random_dist(ncols_2, dims[1]);
dbcsr_distribution dist1 = nullptr;
dbcsr_distribution dist2 = nullptr;
dbcsr_distribution dist3 = nullptr;
//create distributions
c_dbcsr_distribution_new(&dist1, group, row_dist_1.data(), row_dist_1.size(), col_dist_1.data(), col_dist_1.size());
c_dbcsr_distribution_new(&dist2, group, row_dist_2.data(), row_dist_2.size(), col_dist_2.data(), col_dist_2.size());
c_dbcsr_distribution_new(&dist3, group, row_dist_1.data(), row_dist_1.size(), col_dist_2.data(), col_dist_2.size());
// Fill all blocks, i.e. dense matrices
auto fill_matrix = [&](void* matrix, std::vector<int>& irblks, std::vector<int>& icblks) {
std::vector<double> block;
std::vector<int> loc_irblks, loc_icblks;
for (int i = 0; i != (int)irblks.size(); ++i) {
int blk_proc = -1;
int ix = irblks[i];
int jx = icblks[i];
c_dbcsr_get_stored_coordinates(matrix, ix, jx, &blk_proc);
if (mpi_rank == blk_proc) {
loc_irblks.push_back(ix);
loc_icblks.push_back(jx);
}
}
c_dbcsr_reserve_blocks(matrix, loc_irblks.data(), loc_icblks.data(), loc_irblks.size());
void* iter = nullptr;
c_dbcsr_iterator_start(&iter, matrix, nullptr, nullptr, nullptr, nullptr, nullptr);
while (c_dbcsr_iterator_blocks_left(iter)) {
int i = -1;
int j = -1;
int nblk = -1;
int rsize = -1;
int csize = -1;
bool tr = false;
double* blk = nullptr;
c_dbcsr_iterator_next_2d_block_d(iter, &i, &j, &blk, &tr, &nblk, &rsize, &csize, nullptr, nullptr);
std::generate(blk, blk + rsize * csize, [&]() { return static_cast<double>(std::rand()) / RAND_MAX; });
}
c_dbcsr_iterator_stop(&iter);
};
dbcsr_matrix matrix_a = nullptr;
dbcsr_matrix matrix_b = nullptr;
dbcsr_matrix matrix_c = nullptr;
c_dbcsr_create_new(&matrix_a, "matrix a", dist1, dbcsr_type_no_symmetry, row_blk_sizes_1.data(), row_blk_sizes_1.size(),
col_blk_sizes_1.data(), col_blk_sizes_1.size(), nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
c_dbcsr_create_new(&matrix_b, "matrix b", dist2, dbcsr_type_no_symmetry, row_blk_sizes_2.data(), row_blk_sizes_2.size(),
col_blk_sizes_2.data(), col_blk_sizes_2.size(), nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
c_dbcsr_create_new(&matrix_c, "matrix c", dist3, dbcsr_type_no_symmetry, row_blk_sizes_1.data(), row_blk_sizes_1.size(),
col_blk_sizes_2.data(), col_blk_sizes_2.size(), nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
// indices of non-zero blocks
std::vector<int> irblks_1 = {0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
std::vector<int> icblks_1 = {0, 1, 2, 4, 0, 2, 3, 1, 3, 4, 0, 1, 2};
std::vector<int> irblks_2 = {0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4};
std::vector<int> icblks_2 = {0, 2, 3, 0, 1, 2, 3, 0, 2, 3, 1, 2, 3, 0, 1, 2, 3};
std::vector<int> irblks_3 = {0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};
std::vector<int> icblks_3 = {0, 1, 2, 3, 0, 2, 3, 1, 2, 3, 0, 1, 2, 3};
fill_matrix(matrix_a, irblks_1, icblks_1);
c_dbcsr_finalize(matrix_a);
fill_matrix(matrix_b, irblks_2, icblks_2);
c_dbcsr_finalize(matrix_b);
fill_matrix(matrix_c, irblks_3, icblks_3);
c_dbcsr_finalize(matrix_c);
// Compute C = 3.0 * A * B + 2.0 * C
c_dbcsr_multiply_d('N', 'N', 3.0, matrix_a, matrix_b, 2.0, matrix_c, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr);
c_dbcsr_print(matrix_c);
// release the matrices
c_dbcsr_release(&matrix_a);
c_dbcsr_release(&matrix_b);
c_dbcsr_release(&matrix_c);
c_dbcsr_distribution_release(&dist1);
c_dbcsr_distribution_release(&dist2);
c_dbcsr_distribution_release(&dist3);
MPI_Comm_free(&group);
// finalize the DBCSR library
c_dbcsr_finalize_lib();
// finalize MPI
MPI_Finalize();
return 0;
}
|