1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
/*=========================================================================
*
* Copyright NumFOCUS
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*=========================================================================*/
#ifndef itkGPUReduction_h
#define itkGPUReduction_h
#include "itkObject.h"
#include "itkGPUDataManager.h"
#include "itkGPUKernelManager.h"
#include "itkOpenCLUtil.h"
namespace itk
{
/** Create a helper GPU Kernel class for GPUReduction */
itkGPUKernelClassMacro(GPUReductionKernel);
/**
* \class GPUReduction
*
* This class encapsulate the parallel reduction algorithm. An example
* of this algorithm is to compute the sum of a long array in parallel.
*
* \ingroup ITKGPUCommon
*/
template <typename TElement>
class ITK_TEMPLATE_EXPORT GPUReduction : public Object
{
public:
ITK_DISALLOW_COPY_AND_MOVE(GPUReduction);
/** Standard class type aliases. */
using Self = GPUReduction;
using Superclass = Object;
using Pointer = SmartPointer<Self>;
using ConstPointer = SmartPointer<const Self>;
/** Method for creation through the object factory. */
itkNewMacro(Self);
/** \see LightObject::GetNameOfClass() */
itkOverrideGetNameOfClassMacro(GPUReduction);
using GPUDataPointer = GPUDataManager::Pointer;
itkGetMacro(GPUDataManager, GPUDataPointer);
itkGetMacro(GPUResult, TElement);
itkGetMacro(CPUResult, TElement);
/** Get OpenCL Kernel source as a string, creates a GetOpenCLSource method */
itkGetOpenCLSourceFromKernelMacro(GPUReductionKernel);
unsigned int
NextPow2(unsigned int x);
bool
isPow2(unsigned int x);
void
GetNumBlocksAndThreads(int whichKernel, int n, int maxBlocks, int maxThreads, int & blocks, int & threads);
unsigned int
GetReductionKernel(int whichKernel, int blockSize, int isPowOf2);
void
AllocateGPUInputBuffer(TElement * h_idata = nullptr);
void
ReleaseGPUInputBuffer();
void
InitializeKernel(unsigned int size);
TElement
RandomTest();
TElement
GPUGenerateData();
TElement
CPUGenerateData(TElement * data, int size);
TElement
GPUReduce(cl_int n,
int numThreads,
int numBlocks,
int maxThreads,
int maxBlocks,
int whichKernel,
bool cpuFinalReduction,
int cpuFinalThreshold,
double * dTotalTime,
GPUDataPointer idata,
GPUDataPointer odata);
protected:
GPUReduction();
~GPUReduction() override;
void
PrintSelf(std::ostream & os, Indent indent) const override;
/** GPU kernel manager for GPUFiniteDifferenceFunction class */
GPUKernelManager::Pointer m_GPUKernelManager{};
GPUDataPointer m_GPUDataManager{};
/* GPU kernel handle for GPUComputeUpdate */
int m_ReduceGPUKernelHandle{};
int m_TestGPUKernelHandle{};
unsigned int m_Size{};
bool m_SmallBlock{};
TElement m_GPUResult, m_CPUResult{};
};
} // end namespace itk
#ifndef ITK_MANUAL_INSTANTIATION
# include "itkGPUReduction.hxx"
#endif
#endif
|