1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
/*
//
// Copyright 2010 SRI International
//
// This file is part of the Computational Morphometry Toolkit.
//
// http://www.nitrc.org/projects/cmtk/
//
// The Computational Morphometry Toolkit is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// The Computational Morphometry Toolkit is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with the Computational Morphometry Toolkit. If not, see
// <http://www.gnu.org/licenses/>.
//
// $Revision: 2124 $
//
// $LastChangedDate: 2010-07-30 15:04:33 -0700 (Fri, 30 Jul 2010) $
//
// $LastChangedBy: torstenrohlfing $
//
*/
#include "cmtkSumReduction_kernel.h"
#include "GPU/cmtkCUDA.h"
#include <cuda_runtime_api.h>
template<class T>
__global__
void cmtkSumReductionKernel( T* data, const int n )
{
const int tx = threadIdx.x;
for ( int i = tx + blockDim.x; i < n; i += blockDim.x )
{
data[tx] += data[i];
}
__syncthreads();
if ( tx == 0 )
{
for ( int i = 1; i < blockDim.x; ++i )
data[0] += data[i];
}
}
template<class T>
T
cmtk::SumReduction( T* data, const int n )
{
cmtkSumReductionKernel<T><<<1,512>>>( data, n );
T result;
cmtkCheckCallCUDA( cudaMemcpy( &result, data, sizeof( T ), cudaMemcpyDeviceToHost ) );
return result;
}
template int cmtk::SumReduction<int>( int* data, const int n );
template float cmtk::SumReduction<float>( float* data, const int n );
|