File: itkGPUReduction.h

package info (click to toggle)
insighttoolkit5 5.4.3-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 704,384 kB
  • sloc: cpp: 783,592; ansic: 628,724; xml: 44,704; fortran: 34,250; python: 22,874; sh: 4,078; pascal: 2,636; lisp: 2,158; makefile: 464; yacc: 328; asm: 205; perl: 203; lex: 146; tcl: 132; javascript: 98; csh: 81
file content (127 lines) | stat: -rw-r--r-- 3,635 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*=========================================================================
 *
 *  Copyright NumFOCUS
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *         https://www.apache.org/licenses/LICENSE-2.0.txt
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *=========================================================================*/
#ifndef itkGPUReduction_h
#define itkGPUReduction_h

#include "itkObject.h"
#include "itkGPUDataManager.h"
#include "itkGPUKernelManager.h"
#include "itkOpenCLUtil.h"

namespace itk
{
/** Create a helper GPU Kernel class for GPUReduction */
itkGPUKernelClassMacro(GPUReductionKernel);

/**
 * \class GPUReduction
 *
 * This class encapsulate the parallel reduction algorithm. An example
 * of this algorithm is to compute the sum of a long array in parallel.
 *
 * \ingroup ITKGPUCommon
 */
template <typename TElement>
class ITK_TEMPLATE_EXPORT GPUReduction : public Object
{
public:
  ITK_DISALLOW_COPY_AND_MOVE(GPUReduction);

  /** Standard class type aliases. */
  using Self = GPUReduction;
  using Superclass = Object;
  using Pointer = SmartPointer<Self>;
  using ConstPointer = SmartPointer<const Self>;

  /** Method for creation through the object factory. */
  itkNewMacro(Self);

  /** \see LightObject::GetNameOfClass() */
  itkOverrideGetNameOfClassMacro(GPUReduction);

  using GPUDataPointer = GPUDataManager::Pointer;

  itkGetMacro(GPUDataManager, GPUDataPointer);
  itkGetMacro(GPUResult, TElement);
  itkGetMacro(CPUResult, TElement);

  /** Get OpenCL Kernel source as a string, creates a GetOpenCLSource method */
  itkGetOpenCLSourceFromKernelMacro(GPUReductionKernel);

  unsigned int
  NextPow2(unsigned int x);
  bool
  isPow2(unsigned int x);
  void
  GetNumBlocksAndThreads(int whichKernel, int n, int maxBlocks, int maxThreads, int & blocks, int & threads);
  unsigned int
  GetReductionKernel(int whichKernel, int blockSize, int isPowOf2);

  void
  AllocateGPUInputBuffer(TElement * h_idata = nullptr);
  void
  ReleaseGPUInputBuffer();
  void
  InitializeKernel(unsigned int size);

  TElement
  RandomTest();
  TElement
  GPUGenerateData();
  TElement
  CPUGenerateData(TElement * data, int size);

  TElement
  GPUReduce(cl_int         n,
            int            numThreads,
            int            numBlocks,
            int            maxThreads,
            int            maxBlocks,
            int            whichKernel,
            bool           cpuFinalReduction,
            int            cpuFinalThreshold,
            double *       dTotalTime,
            GPUDataPointer idata,
            GPUDataPointer odata);

protected:
  GPUReduction();
  ~GPUReduction() override;
  void
  PrintSelf(std::ostream & os, Indent indent) const override;

  /** GPU kernel manager for GPUFiniteDifferenceFunction class */
  GPUKernelManager::Pointer m_GPUKernelManager{};
  GPUDataPointer            m_GPUDataManager{};

  /* GPU kernel handle for GPUComputeUpdate */
  int m_ReduceGPUKernelHandle{};
  int m_TestGPUKernelHandle{};

  unsigned int m_Size{};
  bool         m_SmallBlock{};

  TElement m_GPUResult, m_CPUResult{};
};
} // end namespace itk

#ifndef ITK_MANUAL_INSTANTIATION
#  include "itkGPUReduction.hxx"
#endif

#endif