1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
/*
-- MAGMA (version 2.9.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date January 2025
@author Azzam Haidar
*/
#include <cuda_runtime.h>
#include "magma_internal.h"
extern "C" {
#if defined(MAGMA_HAVE_CUDA) || defined(MAGMA_HAVE_HIP)
magma_int_t magma_buildconnection_mgpu(
magma_int_t gnode[MagmaMaxGPUs+2][MagmaMaxGPUs+2],
magma_int_t *ncmplx, magma_int_t ngpu)
{
magma_int_t *deviceid = NULL;
magma_imalloc_cpu( &deviceid, ngpu );
memset( deviceid, 0, ngpu*sizeof(magma_int_t) );
ncmplx[0] = 0;
int samecomplex = -1;
cudaError_t err;
cudaDeviceProp prop;
magma_int_t cmplxnb = 0;
magma_int_t cmplxid = 0;
magma_int_t lcgpunb = 0;
for( magma_int_t d = 0; d < ngpu; ++d ) {
// check for unified memory & enable peer memory access between all GPUs.
magma_setdevice( d );
cudaGetDeviceProperties( &prop, int(d) );
#ifdef MAGMA_HAVE_CUDA
if ( ! prop.unifiedAddressing ) {
#elif defined(MAGMA_HAVE_HIP)
// assume it does, HIP does not have support for checking this
if ( ! true ) {
#endif
printf( "device %lld doesn't support unified addressing\n", (long long) d );
magma_free_cpu( deviceid );
return -1;
}
// add this device to the list if not added yet.
// not added yet meaning belong to a new complex
if (deviceid[d] == 0) {
cmplxnb = cmplxnb + 1;
cmplxid = cmplxnb - 1;
gnode[cmplxid][MagmaMaxGPUs] = 1;
lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
gnode[cmplxid][lcgpunb] = d;
deviceid[d] = -1;
}
//printf("device %lld:\n", (long long) d );
for( magma_int_t d2 = d+1; d2 < ngpu; ++d2 ) {
// check for unified memory & enable peer memory access between all GPUs.
magma_setdevice( d2 );
cudaGetDeviceProperties( &prop, int(d2) );
#ifdef MAGMA_HAVE_CUDA
if ( ! prop.unifiedAddressing ) {
#elif defined(MAGMA_HAVE_HIP)
// assume it does, HIP does not have support for checking this
if ( ! true ) {
#endif
printf( "device %lld doesn't support unified addressing\n", (long long) d2 );
magma_free_cpu( deviceid );
return -1;
}
/* TODO err = */ cudaDeviceCanAccessPeer( &samecomplex, int(d), int(d2) );
//printf(" device %lld and device %lld have samecomplex = %lld\n",
// (long long) d, (long long) d2, (long long) samecomplex );
if (samecomplex == 1) {
// d and d2 are on the same complex so add them, note that d is already added
// so just enable the peer Access for d and enable+add d2.
// FOR d:
magma_setdevice( d );
err = cudaDeviceEnablePeerAccess( int(d2), 0 );
//printf("enabling devide %lld ==> %lld error %lld\n",
// (long long) d, (long long) d2, (long long) err );
if ( err != cudaSuccess && err != cudaErrorPeerAccessAlreadyEnabled ) {
printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
(long long) d2, (long long) err );
magma_free_cpu( deviceid );
return -2;
}
// FOR d2:
magma_setdevice( d2 );
err = cudaDeviceEnablePeerAccess( int(d), 0 );
//printf("enabling devide %lld ==> %lld error %lld\n",
// (long long) d2, (long long) d, (long long) err );
if ((err == cudaSuccess) || (err == cudaErrorPeerAccessAlreadyEnabled)) {
if (deviceid[d2] == 0) {
//printf("adding device %lld\n", (long long) d2 );
gnode[cmplxid][MagmaMaxGPUs] = gnode[cmplxid][MagmaMaxGPUs]+1;
lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
gnode[cmplxid][lcgpunb] = d2;
deviceid[d2] = -1;
}
} else {
printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
(long long) d, (long long) err );
magma_free_cpu( deviceid );
return -2;
}
}
}
}
ncmplx[0] = cmplxnb;
magma_free_cpu( deviceid );
return cmplxnb;
#else
// Err: CUDA only
return -1;
#endif
}
} /* extern "C" */
|