File: cmtkSumReduction_kernel.cu

package info (click to toggle)

cmtk 3.3.1p2%2Bdfsg-4

links: PTS, VCS
area: main
in suites: forky, sid
size: 10,524 kB
sloc: cpp: 87,098; ansic: 23,347; sh: 3,896; xml: 1,551; perl: 707; makefile: 334

file content (69 lines) | stat: -rw-r--r-- 1,836 bytes

parent folder | download | duplicates (8)

/*
//
//  Copyright 2010 SRI International
//
//  This file is part of the Computational Morphometry Toolkit.
//
//  http://www.nitrc.org/projects/cmtk/
//
//  The Computational Morphometry Toolkit is free software: you can
//  redistribute it and/or modify it under the terms of the GNU General Public
//  License as published by the Free Software Foundation, either version 3 of
//  the License, or (at your option) any later version.
//
//  The Computational Morphometry Toolkit is distributed in the hope that it
//  will be useful, but WITHOUT ANY WARRANTY; without even the implied
//  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License along
//  with the Computational Morphometry Toolkit.  If not, see
//  <http://www.gnu.org/licenses/>.
//
//  $Revision: 2124 $
//
//  $LastChangedDate: 2010-07-30 15:04:33 -0700 (Fri, 30 Jul 2010) $
//
//  $LastChangedBy: torstenrohlfing $
//
*/

#include "cmtkSumReduction_kernel.h"

#include "GPU/cmtkCUDA.h"

#include <cuda_runtime_api.h>

template<class T>
__global__
void cmtkSumReductionKernel( T* data, const int n )
{
  const int tx = threadIdx.x;

  for ( int i = tx + blockDim.x; i < n; i += blockDim.x )
    {
      data[tx] += data[i];
    }

  __syncthreads();

  if ( tx == 0 )
    {
      for ( int i = 1; i < blockDim.x; ++i )
	data[0] += data[i];
    }
}

template<class T>
T
cmtk::SumReduction( T* data, const int n )
{
  cmtkSumReductionKernel<T><<<1,512>>>( data, n );
  
  T result;
  cmtkCheckCallCUDA( cudaMemcpy( &result, data, sizeof( T ), cudaMemcpyDeviceToHost ) );
  return result;
}

template int cmtk::SumReduction<int>( int* data, const int n );
template float cmtk::SumReduction<float>( float* data, const int n );