File: connection_mgpu.cpp

package info (click to toggle)
magma 2.5.4%2Bds-3
  • links: PTS, VCS
  • area: contrib
  • in suites: bullseye
  • size: 55,132 kB
  • sloc: cpp: 403,043; fortran: 121,916; ansic: 29,190; python: 25,167; f90: 13,666; makefile: 776; csh: 232; xml: 182; sh: 178; perl: 88
file content (108 lines) | stat: -rw-r--r-- 4,250 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
    -- MAGMA (version 2.5.4) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date October 2020

       @author Azzam Haidar
*/
#include <cuda_runtime.h>

#include "magma_internal.h"

extern "C"
magma_int_t magma_buildconnection_mgpu(
    magma_int_t gnode[MagmaMaxGPUs+2][MagmaMaxGPUs+2],
    magma_int_t *ncmplx, magma_int_t ngpu)
{
    magma_int_t *deviceid = NULL;
    magma_imalloc_cpu( &deviceid, ngpu );
    memset( deviceid, 0, ngpu*sizeof(magma_int_t) );

    ncmplx[0] = 0;

    int samecomplex = -1;
    cudaError_t err;
    cudaDeviceProp prop;

    magma_int_t cmplxnb = 0;
    magma_int_t cmplxid = 0;
    magma_int_t lcgpunb = 0;
    for( magma_int_t d = 0; d < ngpu; ++d ) {
        // check for unified memory & enable peer memory access between all GPUs.
        magma_setdevice( d );
        cudaGetDeviceProperties( &prop, int(d) );
        if ( ! prop.unifiedAddressing ) {
            printf( "device %lld doesn't support unified addressing\n", (long long) d );
            magma_free_cpu( deviceid );
            return -1;
        }
        // add this device to the list if not added yet.
        // not added yet meaning belong to a new complex
        if (deviceid[d] == 0) {
            cmplxnb = cmplxnb + 1;
            cmplxid = cmplxnb - 1;
            gnode[cmplxid][MagmaMaxGPUs] = 1;
            lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
            gnode[cmplxid][lcgpunb] = d;
            deviceid[d] = -1;
        }
        //printf("device %lld:\n", (long long) d );

        for( magma_int_t d2 = d+1; d2 < ngpu; ++d2 ) {
            // check for unified memory & enable peer memory access between all GPUs.
            magma_setdevice( d2 );
            cudaGetDeviceProperties( &prop, int(d2) );
            if ( ! prop.unifiedAddressing ) {
                printf( "device %lld doesn't support unified addressing\n", (long long) d2 );
                magma_free_cpu( deviceid );
                return -1;
            }

            /* TODO err = */ cudaDeviceCanAccessPeer( &samecomplex, int(d), int(d2) );

            //printf(" device %lld and device %lld have samecomplex = %lld\n",
            //       (long long) d, (long long) d2, (long long) samecomplex );
            if (samecomplex == 1) {
                // d and d2 are on the same complex so add them, note that d is already added
                // so just enable the peer Access for d and enable+add d2.
                // FOR d:
                magma_setdevice( d );
                err = cudaDeviceEnablePeerAccess( int(d2), 0 );
                //printf("enabling devide %lld ==> %lld  error %lld\n",
                //       (long long) d, (long long) d2, (long long) err );
                if ( err != cudaSuccess && err != cudaErrorPeerAccessAlreadyEnabled ) {
                    printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
                            (long long) d2, (long long) err );
                    magma_free_cpu( deviceid );
                    return -2;
                }

                // FOR d2:
                magma_setdevice( d2 );
                err = cudaDeviceEnablePeerAccess( int(d), 0 );
                //printf("enabling devide %lld ==> %lld  error %lld\n",
                //       (long long) d2, (long long) d, (long long) err );
                if ((err == cudaSuccess) || (err == cudaErrorPeerAccessAlreadyEnabled)) {
                    if (deviceid[d2] == 0) {
                        //printf("adding device %lld\n", (long long) d2 );
                        gnode[cmplxid][MagmaMaxGPUs] = gnode[cmplxid][MagmaMaxGPUs]+1;
                        lcgpunb                      = gnode[cmplxid][MagmaMaxGPUs]-1;
                        gnode[cmplxid][lcgpunb] = d2;
                        deviceid[d2] = -1;
                    }
                } else {
                    printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
                            (long long) d, (long long) err );
                    magma_free_cpu( deviceid );
                    return -2;
                }
            }
        }
    }

    ncmplx[0] = cmplxnb;
    magma_free_cpu( deviceid );
    return cmplxnb;
}