File: connection_mgpu.cpp

package info (click to toggle)
magma 2.9.0%2Bds-2
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 83,212 kB
  • sloc: cpp: 709,115; fortran: 121,916; ansic: 32,343; python: 25,603; f90: 15,208; makefile: 942; xml: 253; csh: 232; sh: 203; perl: 104
file content (131 lines) | stat: -rw-r--r-- 4,763 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
    -- MAGMA (version 2.9.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date January 2025

       @author Azzam Haidar
*/
#include <cuda_runtime.h>

#include "magma_internal.h"

extern "C" {

#if defined(MAGMA_HAVE_CUDA) || defined(MAGMA_HAVE_HIP)
magma_int_t magma_buildconnection_mgpu(
    magma_int_t gnode[MagmaMaxGPUs+2][MagmaMaxGPUs+2],
    magma_int_t *ncmplx, magma_int_t ngpu)
{
    magma_int_t *deviceid = NULL;
    magma_imalloc_cpu( &deviceid, ngpu );
    memset( deviceid, 0, ngpu*sizeof(magma_int_t) );

    ncmplx[0] = 0;

    int samecomplex = -1;
    cudaError_t err;
    cudaDeviceProp prop;

    magma_int_t cmplxnb = 0;
    magma_int_t cmplxid = 0;
    magma_int_t lcgpunb = 0;
    for( magma_int_t d = 0; d < ngpu; ++d ) {
        // check for unified memory & enable peer memory access between all GPUs.
        magma_setdevice( d );
        cudaGetDeviceProperties( &prop, int(d) );

        #ifdef MAGMA_HAVE_CUDA
        if ( ! prop.unifiedAddressing ) {
        #elif defined(MAGMA_HAVE_HIP)
        // assume it does, HIP does not have support for checking this
        if ( ! true ) {
        #endif
            printf( "device %lld doesn't support unified addressing\n", (long long) d );
            magma_free_cpu( deviceid );
            return -1;
        }

        // add this device to the list if not added yet.
        // not added yet meaning belong to a new complex
        if (deviceid[d] == 0) {
            cmplxnb = cmplxnb + 1;
            cmplxid = cmplxnb - 1;
            gnode[cmplxid][MagmaMaxGPUs] = 1;
            lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
            gnode[cmplxid][lcgpunb] = d;
            deviceid[d] = -1;
        }
        //printf("device %lld:\n", (long long) d );

        for( magma_int_t d2 = d+1; d2 < ngpu; ++d2 ) {
            // check for unified memory & enable peer memory access between all GPUs.
            magma_setdevice( d2 );
            cudaGetDeviceProperties( &prop, int(d2) );
            #ifdef MAGMA_HAVE_CUDA
            if ( ! prop.unifiedAddressing ) {
            #elif defined(MAGMA_HAVE_HIP)
            // assume it does, HIP does not have support for checking this
            if ( ! true ) {
            #endif
                printf( "device %lld doesn't support unified addressing\n", (long long) d2 );
                magma_free_cpu( deviceid );
                return -1;
            }

            /* TODO err = */ cudaDeviceCanAccessPeer( &samecomplex, int(d), int(d2) );

            //printf(" device %lld and device %lld have samecomplex = %lld\n",
            //       (long long) d, (long long) d2, (long long) samecomplex );
            if (samecomplex == 1) {
                // d and d2 are on the same complex so add them, note that d is already added
                // so just enable the peer Access for d and enable+add d2.
                // FOR d:
                magma_setdevice( d );
                err = cudaDeviceEnablePeerAccess( int(d2), 0 );
                //printf("enabling devide %lld ==> %lld  error %lld\n",
                //       (long long) d, (long long) d2, (long long) err );
                if ( err != cudaSuccess && err != cudaErrorPeerAccessAlreadyEnabled ) {
                    printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
                            (long long) d2, (long long) err );
                    magma_free_cpu( deviceid );
                    return -2;
                }

                // FOR d2:
                magma_setdevice( d2 );
                err = cudaDeviceEnablePeerAccess( int(d), 0 );
                //printf("enabling devide %lld ==> %lld  error %lld\n",
                //       (long long) d2, (long long) d, (long long) err );
                if ((err == cudaSuccess) || (err == cudaErrorPeerAccessAlreadyEnabled)) {
                    if (deviceid[d2] == 0) {
                        //printf("adding device %lld\n", (long long) d2 );
                        gnode[cmplxid][MagmaMaxGPUs] = gnode[cmplxid][MagmaMaxGPUs]+1;
                        lcgpunb                      = gnode[cmplxid][MagmaMaxGPUs]-1;
                        gnode[cmplxid][lcgpunb] = d2;
                        deviceid[d2] = -1;
                    }
                } else {
                    printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
                            (long long) d, (long long) err );
                    magma_free_cpu( deviceid );
                    return -2;
                }
            }
        }
    }

    ncmplx[0] = cmplxnb;
    magma_free_cpu( deviceid );
    return cmplxnb;
#else 
    // Err: CUDA only
    return -1;
#endif

}

} /* extern "C" */