1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
|
/*
-- MAGMA (version 2.5.4) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date October 2020
@author Azzam Haidar
*/
#include <cuda_runtime.h>
#include "magma_internal.h"
extern "C"
magma_int_t magma_buildconnection_mgpu(
magma_int_t gnode[MagmaMaxGPUs+2][MagmaMaxGPUs+2],
magma_int_t *ncmplx, magma_int_t ngpu)
{
magma_int_t *deviceid = NULL;
magma_imalloc_cpu( &deviceid, ngpu );
memset( deviceid, 0, ngpu*sizeof(magma_int_t) );
ncmplx[0] = 0;
int samecomplex = -1;
cudaError_t err;
cudaDeviceProp prop;
magma_int_t cmplxnb = 0;
magma_int_t cmplxid = 0;
magma_int_t lcgpunb = 0;
for( magma_int_t d = 0; d < ngpu; ++d ) {
// check for unified memory & enable peer memory access between all GPUs.
magma_setdevice( d );
cudaGetDeviceProperties( &prop, int(d) );
if ( ! prop.unifiedAddressing ) {
printf( "device %lld doesn't support unified addressing\n", (long long) d );
magma_free_cpu( deviceid );
return -1;
}
// add this device to the list if not added yet.
// not added yet meaning belong to a new complex
if (deviceid[d] == 0) {
cmplxnb = cmplxnb + 1;
cmplxid = cmplxnb - 1;
gnode[cmplxid][MagmaMaxGPUs] = 1;
lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
gnode[cmplxid][lcgpunb] = d;
deviceid[d] = -1;
}
//printf("device %lld:\n", (long long) d );
for( magma_int_t d2 = d+1; d2 < ngpu; ++d2 ) {
// check for unified memory & enable peer memory access between all GPUs.
magma_setdevice( d2 );
cudaGetDeviceProperties( &prop, int(d2) );
if ( ! prop.unifiedAddressing ) {
printf( "device %lld doesn't support unified addressing\n", (long long) d2 );
magma_free_cpu( deviceid );
return -1;
}
/* TODO err = */ cudaDeviceCanAccessPeer( &samecomplex, int(d), int(d2) );
//printf(" device %lld and device %lld have samecomplex = %lld\n",
// (long long) d, (long long) d2, (long long) samecomplex );
if (samecomplex == 1) {
// d and d2 are on the same complex so add them, note that d is already added
// so just enable the peer Access for d and enable+add d2.
// FOR d:
magma_setdevice( d );
err = cudaDeviceEnablePeerAccess( int(d2), 0 );
//printf("enabling devide %lld ==> %lld error %lld\n",
// (long long) d, (long long) d2, (long long) err );
if ( err != cudaSuccess && err != cudaErrorPeerAccessAlreadyEnabled ) {
printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
(long long) d2, (long long) err );
magma_free_cpu( deviceid );
return -2;
}
// FOR d2:
magma_setdevice( d2 );
err = cudaDeviceEnablePeerAccess( int(d), 0 );
//printf("enabling devide %lld ==> %lld error %lld\n",
// (long long) d2, (long long) d, (long long) err );
if ((err == cudaSuccess) || (err == cudaErrorPeerAccessAlreadyEnabled)) {
if (deviceid[d2] == 0) {
//printf("adding device %lld\n", (long long) d2 );
gnode[cmplxid][MagmaMaxGPUs] = gnode[cmplxid][MagmaMaxGPUs]+1;
lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
gnode[cmplxid][lcgpunb] = d2;
deviceid[d2] = -1;
}
} else {
printf( "device %lld cudaDeviceEnablePeerAccess error %lld\n",
(long long) d, (long long) err );
magma_free_cpu( deviceid );
return -2;
}
}
}
}
ncmplx[0] = cmplxnb;
magma_free_cpu( deviceid );
return cmplxnb;
}
|