1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
|
//------------------------------------------------------------------------------
// GB_memcpy: parallel memcpy
//------------------------------------------------------------------------------
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//------------------------------------------------------------------------------
// Note that this function uses its own hard-coded chunk size.
#include "GB.h"
#define GB_MEM_CHUNK (1024*1024)
void GB_memcpy // parallel memcpy
(
void *dest, // destination
const void *src, // source
size_t n, // # of bytes to copy
int nthreads // max # of threads to use
)
{
if (nthreads <= 1 || n <= GB_MEM_CHUNK)
{
//----------------------------------------------------------------------
// memcpy using a single thread
//----------------------------------------------------------------------
memcpy (dest, src, n) ;
}
else
{
//----------------------------------------------------------------------
// memcpy using multiple threads
//----------------------------------------------------------------------
size_t nchunks = 1 + (n / GB_MEM_CHUNK) ;
if (((size_t) nthreads) > nchunks)
{
nthreads = (int) nchunks ;
}
GB_void *pdest = (GB_void *) dest ;
const GB_void *psrc = (GB_void *) src ;
int64_t k ;
#pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
for (k = 0 ; k < nchunks ; k++)
{
size_t start = k * GB_MEM_CHUNK ;
if (start < n)
{
size_t chunk = GB_IMIN (n - start, GB_MEM_CHUNK) ;
memcpy (pdest + start, psrc + start, chunk) ;
}
}
}
}
|