File: cudpp_util.h

package info (click to toggle)
lammps 20220106.git7586adbb6a%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 348,064 kB
  • sloc: cpp: 831,421; python: 24,896; xml: 14,949; f90: 10,845; ansic: 7,967; sh: 4,226; perl: 4,064; fortran: 2,424; makefile: 1,501; objc: 238; lisp: 163; csh: 16; awk: 14; tcl: 6
file content (363 lines) | stat: -rw-r--r-- 9,215 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision: 5289 $
// $Date: 2010-11-23 13:04:43 -0700 (Tue, 23 Nov 2010) $
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
// -------------------------------------------------------------

/**
 * @file
 * cudpp_util.h
 *
 * @brief C++ utility functions and classes used internally to cuDPP
 */

#ifndef __CUDPP_UTIL_H__
#define __CUDPP_UTIL_H__

#ifdef WIN32
#include <windows.h>
#endif

#include <cuda.h>
#include <cudpp.h>
#include <limits.h>
#include <cfloat>

#if (CUDA_VERSION >= 3000)
#define LAUNCH_BOUNDS(x) __launch_bounds__((x))
#define LAUNCH_BOUNDS_MINBLOCKs(x, y) __launch_bounds__((x),(y))
#else
#define LAUNCH_BOUNDS(x)
#define LAUNCH_BOUNDS_MINBLOCKS(x, y)
#endif


/** @brief Determine if \a n is a power of two.
  * @param n Value to be checked to see if it is a power of two
  * @returns True if \a n is a power of two, false otherwise
  */
inline bool
isPowerOfTwo(int n)
{
    return ((n&(n-1))==0) ;
}

/** @brief Determine if an integer \a n is a multiple of an integer \a f.
  * @param n Multiple
  * @param f Factor
  * @returns True if \a n is a multiple of \a f, false otherwise
  */
inline bool
isMultiple(int n, int f)
{
    if (isPowerOfTwo(f))
        return ((n&(f-1))==0);
    else
        return (n%f==0);
}

/** @brief Compute the smallest power of two larger than \a n.
  * @param n Input value
  * @returns The smallest power f two larger than \a n
  */
inline int
ceilPow2(int n)
{
        double log2n = log2((double)n);
        if (isPowerOfTwo(n))
                return n;
        else
                return 1 << (int)ceil(log2n);
}

/** @brief Compute the largest power of two smaller than \a n.
  * @param n Input value
  * @returns The largest power of two smaller than \a n.
  */
inline int
floorPow2(int n)
{
#ifdef WIN32
    // method 2
    return 1 << (int)_logb((float)n);
#else
    // method 3
    int exp;
    frexp((float)n, &exp);
    return 1 << (exp - 1);
#endif
}

/** @brief Returns the maximum value for type \a T.
  *
  * Implemented using template specialization on \a T.
  */
template <class T>
__host__ __device__ inline T getMax() { return 0; }
/** @brief Returns the minimum value for type \a T.
*
* Implemented using template specialization on \a T.
*/
template <class T>
__host__ __device__ inline T getMin() { return 0; }
// type specializations for the above
// getMax
template <> __host__ __device__ inline int getMax() { return INT_MAX; }
template <> __host__ __device__ inline unsigned int getMax() { return INT_MAX; }
template <> __host__ __device__ inline float getMax() { return FLT_MAX; }
template <> __host__ __device__ inline char getMax() { return (char)INT_MAX; }
template <> __host__ __device__ inline unsigned char getMax() { return (unsigned char)INT_MAX; }
// getMin
template <> __host__ __device__ inline int getMin() { return INT_MIN; }
template <> __host__ __device__ inline unsigned int getMin() { return 0; }
template <> __host__ __device__ inline float getMin() { return -FLT_MAX; }
template <> __host__ __device__ inline char getMin() { return (char)INT_MIN; }
template <> __host__ __device__ inline unsigned char getMin() { return (unsigned char)0; }

/** @brief Returns the maximum of three values.
  * @param a First value.
  * @param b Second value.
  * @param c Third value.
  * @returns The maximum of \a a, \a b and \a c.
  */
template<class T>
inline int max3(T a, T b, T c)
{
    return (a > b) ? ((a > c)? a : c) : ((b > c) ? b : c);
}

/** @brief Utility template struct for generating small vector types from scalar types
  *
  * Given a base scalar type (\c int, \c float, etc.) and a vector length (1 through 4) as
  * template parameters, this struct defines a vector type (\c float3, \c int4, etc.) of the
  * specified length and base type.  For example:
  * \code
  * template <class T>
  * __device__ void myKernel(T *data)
  * {
  *     typeToVector<T,4>::Result myVec4;             // create a vec4 of type T
  *     myVec4 = (typeToVector<T,4>::Result*)data[0]; // load first element of data as a vec4
  * }
  * \endcode
  *
  * This functionality is implemented using template specialization.  Currently specializations
  * for int, float, and unsigned int vectors of lengths 2-4 are defined.  Note that this results
  * in types being generated at compile time -- there is no runtime cost.  typeToVector is used by
  * the optimized scan \c __device__ functions in scan_cta.cu.
  */
template <typename T, int N>
struct typeToVector
{
    typedef T Result;
};

template<>
struct typeToVector<int, 4>
{
    typedef int4 Result;
};
template<>
struct typeToVector<unsigned int, 4>
{
    typedef uint4 Result;
};
template<>
struct typeToVector<float, 4>
{
    typedef float4 Result;
};
template<>
struct typeToVector<int, 3>
{
    typedef int3 Result;
};
template<>
struct typeToVector<unsigned int, 3>
{
    typedef uint3 Result;
};
template<>
struct typeToVector<float, 3>
{
    typedef float3 Result;
};
template<>
struct typeToVector<int, 2>
{
    typedef int2 Result;
};
template<>
struct typeToVector<unsigned int, 2>
{
    typedef uint2 Result;
};
template<>
struct typeToVector<float, 2>
{
    typedef float2 Result;
};

/** @brief Templatized operator class used by scan and segmented scan
  *
  * This Operator class is used to allow generic support of binary
  * associative operators in scan.  It defines two member functions,
  * op() and identity(), that are used in place of + and 0 (for
  * example) in the scan and  segmented scan code. Because this is
  * template code, all decisions in the code are made at compile
  * time, resulting in optimal operator code. Currently the operators
  * CUDPP_ADD, CUDPP_MULTIPLY, CUDPP_MIN, and CUDPP_MAX are supported.
  * Operator is implemented using template specialization for the
  * types \c int, \c unsigned int, and \c float.
  */
template <typename T, CUDPPOperator oper>
class Operator
{
public:
    /** Applies the operator to operands \a a and \a b.
      * @param a First operand
      * @param b Second operand
      * @returns a OP b, where OP is defined by ::CUDPPOperator \a oper.
      */
    static __device__ T op(const T a, const T b)
    {
        switch (oper)
        {
        case CUDPP_ADD:
            return a + b;
        case CUDPP_MULTIPLY:
            return a * b;
        case CUDPP_MIN:
            return min(a, b);
        case CUDPP_MAX:
            return max(a, b);
        }
    }

    /** Returns the identity element defined for type \a T */
    static __device__ T identity() { return 0; }
};

// specializations for different types
template <CUDPPOperator oper>
class Operator <int, oper>
{
public:
    static __device__ int op(const int a, const int b)
    {
        switch (oper)
        {
        default:
        case CUDPP_ADD:
            return a + b;
        case CUDPP_MULTIPLY:
            return a * b;
        case CUDPP_MIN:
            return min(a, b);
        case CUDPP_MAX:
            return max(a, b);
        }
    }

    static __device__ int identity()
    {
        switch (oper)
        {
        default:
        case CUDPP_ADD:
            return 0;
        case CUDPP_MULTIPLY:
            return 1;
        case CUDPP_MIN:
            return INT_MAX;
        case CUDPP_MAX:
            return INT_MIN;
        }
    }
};

template <CUDPPOperator oper>
class Operator <unsigned int, oper>
{
public:
    static __device__ unsigned int op(const unsigned int a, const unsigned int b)
    {
        switch (oper)
        {
        default:
        case CUDPP_ADD:
            return a + b;
        case CUDPP_MULTIPLY:
            return a * b;
        case CUDPP_MIN:
            return min(a, b);
        case CUDPP_MAX:
            return max(a, b);
        }
    }

    static __device__ unsigned int identity()
    {
        switch (oper)
        {
        default:
        case CUDPP_ADD:
            return 0;
        case CUDPP_MULTIPLY:
            return 1;
        case CUDPP_MIN:
            return UINT_MAX;
        case CUDPP_MAX:
            return 0;
        }
    }
};


template <CUDPPOperator oper>
class Operator <float, oper>
{
public:
    static __device__ float op(const float a, const float b)
    {
        switch (oper)
        {
        default:
        case CUDPP_ADD:
            return a + b;
        case CUDPP_MULTIPLY:
            return a * b;
        case CUDPP_MIN:
            return min(a, b);
        case CUDPP_MAX:
            return max(a, b);
        }
    }

    static __device__ float identity()
    {
        switch (oper)
        {
        default:
        case CUDPP_ADD:
            return 0.0f;
        case CUDPP_MULTIPLY:
            return 1.0f;
        case CUDPP_MIN:
            return FLT_MAX;
        case CUDPP_MAX:
            return -FLT_MAX;
        }
    }
};

#endif // __CUDPP_UTIL_H__

// Leave this at the end of the file
// Local Variables:
// mode:c++
// c-file-style: "NVIDIA"
// End: