File: gemm_mpi.cpp

package info (click to toggle)

simgrid 4.1-1

links: PTS, VCS
area: main
in suites: forky, sid
size: 39,192 kB
sloc: cpp: 124,913; ansic: 66,744; python: 8,560; java: 6,773; fortran: 6,079; f90: 5,123; xml: 4,587; sh: 2,194; perl: 1,436; makefile: 111; lisp: 49; javascript: 7; sed: 6

file content (102 lines) | stat: -rw-r--r-- 2,794 bytes

parent folder | download | duplicates (2)

/* Copyright (c) 2019-2025. The SimGrid Team. All rights reserved.          */

/* This program is free software; you can redistribute it and/or modify it
 * under the terms of the license (GNU LGPL) which comes with this package. */

#include <mpi.h>
#include <stdio.h>

const int size = 3000;

float a[size][size];
float b[size][size];
float c[size][size];

void multiply(int istart, int iend)
{
    for (int i = istart; i <= iend; ++i){
        for (int j = 0; j < size; ++j) {
            for (int k = 0; k < size; ++k) {
                c[i][j] += a[i][k] * b[k][j];
            }
        }
    }
}

int main(int argc, char* argv[])
{
    int rank, nproc;
    int istart, iend;
    // double start, end;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // MPI_Barrier(MPI_COMM_WORLD);
    // start = MPI_Wtime();

    if (rank == 0) {
        // Initialize buffers.
        for (int i = 0; i < size; ++i) {
            for (int j = 0; j < size; ++j) {
                a[i][j] = (float)i + j;
                b[i][j] = (float)i - j;
                c[i][j] = 0.0f;
            }
        }
    }

    // Broadcast matrices to all workers.
    MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
    MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
    MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);

    // Partition work by i-for-loop.
    istart = (size / nproc) * rank;
    iend = (size / nproc) * (rank + 1) - 1;

    // Compute matrix multiplication in [istart,iend]
    // of i-for-loop.
    // C <- C + A x B
    multiply(istart, iend);

    // Gather computed results.
    MPI_Gather(c + (size/nproc*rank),
               size*size/nproc,
               MPI_FLOAT,
               c + (size/nproc*rank),
               size*size/nproc,
               MPI_FLOAT,
               0,
               MPI_COMM_WORLD);

    if (rank == 0) {
        // Compute remaining multiplications
        // when size % nproc > 0.
        if (size % nproc > 0) {
            multiply((size/nproc)*nproc, size-1);
        }
    }

    // MPI_Barrier(MPI_COMM_WORLD);
    // end = MPI_Wtime();

    MPI_Finalize();

    // if (rank == 0) { /* use time on master node */
    //     float msec_total = 0.0f;

    //     // Compute and print the performance
    //     float msec_per_matrix_mul = end-start;
    //     double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
    //     double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (msec_per_matrix_mul / 1000.0f);
    //     printf(
    //         "Performance= %.2f GFlop/s, Time= %.3f msec, Size= %.0f Ops\n",
    //         giga_flops,
    //         msec_per_matrix_mul,
    //         flops_per_matrix_mul);
    // }

    return 0;
}