File: cmtkSimpleLevelsetDevice_kernels.cu

package info (click to toggle)
cmtk 3.3.1p2%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 10,524 kB
  • sloc: cpp: 87,098; ansic: 23,347; sh: 3,896; xml: 1,551; perl: 707; makefile: 334
file content (108 lines) | stat: -rw-r--r-- 3,876 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
//
//  Copyright 2010 SRI International
//
//  This file is part of the Computational Morphometry Toolkit.
//
//  http://www.nitrc.org/projects/cmtk/
//
//  The Computational Morphometry Toolkit is free software: you can
//  redistribute it and/or modify it under the terms of the GNU General Public
//  License as published by the Free Software Foundation, either version 3 of
//  the License, or (at your option) any later version.
//
//  The Computational Morphometry Toolkit is distributed in the hope that it
//  will be useful, but WITHOUT ANY WARRANTY; without even the implied
//  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License along
//  with the Computational Morphometry Toolkit.  If not, see
//  <http://www.gnu.org/licenses/>.
//
//  $Revision: 3168 $
//
//  $LastChangedDate: 2011-04-22 12:51:51 -0700 (Fri, 22 Apr 2011) $
//
//  $LastChangedBy: torstenrohlfing $
//
*/

#ifdef _WIN32
// This fixes a strange compile error using VisualStudio 2010 Express.
// See http://forums.nvidia.com/index.php?showtopic=67822
#define WIN32_LEAN_AND_MEAN
#endif

#include "cmtkSimpleLevelsetDevice_kernels.h"

#include "GPU/cmtkCUDA.h"
#include "GPU/cmtkDeviceMemory.h"
#include "GPU/cmtkSumReduction_kernel.h"

#include <cuda_runtime_api.h>

__global__
void
cmtkSimpleLevelsetDeviceUpdateInsideOutsideKernel( float* partialInsideSum, float* partialOutsideSum, int* partialInside, float* levelset, float* data, const int nPixels )
{
  int nInside = 0;
  float insideSum = 0;
  float outsideSum = 0;

  for ( int idx = threadIdx.x; idx < nPixels; idx += blockDim.x )
    {
      const float l = levelset[idx];
      const float d = data[idx];
      const int flag = (l>0) ? 1 : 0;

      nInside += flag;
      insideSum += flag*d;
      outsideSum += (1-flag)*d;
    }
  
  partialInside[threadIdx.x] = nInside;
  partialInsideSum[threadIdx.x] = insideSum;
  partialOutsideSum[threadIdx.x] = outsideSum;
}

void
cmtk::SimpleLevelsetDeviceUpdateInsideOutside( float* levelset, float* data, const int nPixels, float* insideSum, float* outsideSum, int* nInside )
{
  const int nThreads = 512;

  DeviceMemory<int> partialInside( nThreads );
  DeviceMemory<float> partialInsideSum( nThreads );
  DeviceMemory<float> partialOutsideSum( nThreads );
  
  cmtkSimpleLevelsetDeviceUpdateInsideOutsideKernel<<<1,nThreads>>>( partialInsideSum.Ptr(), partialOutsideSum.Ptr(), partialInside.Ptr(), levelset, data, nPixels );
  cmtkCheckLastErrorCUDA;
  
  *nInside = SumReduction( partialInside.Ptr(), nThreads );
  *insideSum = SumReduction( partialInsideSum.Ptr(), nThreads );
  *outsideSum = SumReduction( partialOutsideSum.Ptr(), nThreads );
}

__global__
void
cmtkSimpleLevelsetDeviceUpdateInsideOutsideKernel( float* levelset, float* data, const int nPixels, const float mInside, const float mOutside, const float ratioInOut, const float timeDelta, const float levelsetThreshold )
{
  for ( size_t n = threadIdx.x; n < nPixels; n += blockDim.x )
    {
      const float d = data[n];
      const float l = levelset[n];
      
      const float zInside = fabsf( mInside - d );
      const float zOutside = fabs( mOutside - d );
      
      const float delta = ( zInside>zOutside ) ? -timeDelta * ratioInOut : timeDelta / ratioInOut;
      levelset[n] = fminf( levelsetThreshold, fmaxf( -levelsetThreshold, l+delta ) );
    }
}

void
cmtk::SimpleLevelsetDeviceUpdateLevelset( float* levelset, float* data, const int nPixels, const float mInside, const float mOutside, const float ratioInOut, const float timeDelta, const float levelsetThreshold )
{
  cmtkSimpleLevelsetDeviceUpdateInsideOutsideKernel<<<1,512>>>( levelset, data, nPixels, mInside, mOutside, ratioInOut, timeDelta, levelsetThreshold );
  cmtkCheckLastErrorCUDA;
}