File: itkGPUReduction.h

package info (click to toggle)
insighttoolkit4 4.13.3withdata-dfsg2-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 491,256 kB
  • sloc: cpp: 557,600; ansic: 180,546; fortran: 34,788; python: 16,572; sh: 2,187; lisp: 2,070; tcl: 993; java: 362; perl: 200; makefile: 133; csh: 81; pascal: 69; xml: 19; ruby: 10
file content (119 lines) | stat: -rw-r--r-- 3,704 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*=========================================================================
 *
 *  Copyright Insight Software Consortium
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0.txt
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *=========================================================================*/
#ifndef itkGPUReduction_h
#define itkGPUReduction_h

#include "itkObject.h"
#include "itkGPUDataManager.h"
#include "itkGPUKernelManager.h"
#include "itkOpenCLUtil.h"

namespace itk
{
/** Create a helper GPU Kernel class for GPUReduction */
  itkGPUKernelClassMacro(GPUReductionKernel);

/**
 * \class GPUReduction
 *
 * This class encapsulate the parallel reduction algorithm. An example
 * of this algorithm is to compute the sum of a long array in parallel.
 *
 * \ingroup ITKGPUCommon
 */
template< typename TElement >
class ITK_TEMPLATE_EXPORT GPUReduction :
  public Object
{
public:
  /** Standard class typedefs. */
  typedef GPUReduction               Self;
  typedef Object                     Superclass;
  typedef SmartPointer< Self >       Pointer;
  typedef SmartPointer< const Self > ConstPointer;

  /** Method for creation through the object factory. */
  itkNewMacro(Self);

  /** Run-time type information (and related methods). */
  itkTypeMacro(GPUReduction,
               Object);

  typedef GPUDataManager::Pointer GPUDataPointer;

  itkGetMacro(GPUDataManager, GPUDataPointer);
  itkGetMacro(GPUResult, TElement);
  itkGetMacro(CPUResult, TElement);

  /** Get OpenCL Kernel source as a string, creates a GetOpenCLSource method */
  itkGetOpenCLSourceFromKernelMacro(GPUReductionKernel);

  unsigned int NextPow2( unsigned int x );
  bool isPow2(unsigned int x);
  void GetNumBlocksAndThreads(int whichKernel, int n, int maxBlocks, int maxThreads, int &blocks, int &threads);
  unsigned int GetReductionKernel(int whichKernel, int blockSize, int isPowOf2);

  void AllocateGPUInputBuffer(TElement *h_idata = ITK_NULLPTR);
  void ReleaseGPUInputBuffer();
  void InitializeKernel(unsigned int size);

  TElement RandomTest();
  TElement GPUGenerateData();
  TElement CPUGenerateData(TElement *data, int size);

  TElement GPUReduce(  cl_int  n,
                  int  numThreads,
                  int  numBlocks,
                  int  maxThreads,
                  int  maxBlocks,
                  int  whichKernel,
                  bool cpuFinalReduction,
                  int  cpuFinalThreshold,
                  double* dTotalTime,
                  GPUDataPointer idata,
                  GPUDataPointer odata);

protected:
  GPUReduction();
  ~GPUReduction() ITK_OVERRIDE;
  void PrintSelf(std::ostream & os, Indent indent) const ITK_OVERRIDE;

  /** GPU kernel manager for GPUFiniteDifferenceFunction class */
  GPUKernelManager::Pointer m_GPUKernelManager;
  GPUDataPointer            m_GPUDataManager;

  /* GPU kernel handle for GPUComputeUpdate */
  int           m_ReduceGPUKernelHandle;
  int           m_TestGPUKernelHandle;

  unsigned int  m_Size;
  bool          m_SmallBlock;

  TElement      m_GPUResult, m_CPUResult;

private:
  ITK_DISALLOW_COPY_AND_ASSIGN(GPUReduction);

};
} // end namespace itk

#ifndef ITK_MANUAL_INSTANTIATION
#include "itkGPUReduction.hxx"
#endif

#endif