File: reduce.cuh

package info (click to toggle)
pytorch 1.7.1-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 80,340 kB
  • sloc: cpp: 670,830; python: 343,991; ansic: 67,845; asm: 5,503; sh: 2,924; java: 2,888; xml: 266; makefile: 244; ruby: 148; yacc: 144; objc: 51; lex: 44
file content (53 lines) | stat: -rw-r--r-- 2,723 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#ifndef CAFFE2_UTILS_MATH_REDUCE_CUH_
#define CAFFE2_UTILS_MATH_REDUCE_CUH_

#include <cub/block/block_reduce.cuh>
#include <cub/cub.cuh>

#include "caffe2/core/common_gpu.h"

namespace caffe2 {

template <typename T>
using BlockReduce = cub::BlockReduce<T, CAFFE_CUDA_NUM_THREADS>;

template <typename T, int kBlockDimX, int kBlockDimY>
using BlockReduce2D = cub::
    BlockReduce<T, kBlockDimX, cub::BLOCK_REDUCE_WARP_REDUCTIONS, kBlockDimY>;

#define DISPATCH_REDUCE_KERNEL_BY_2D_BLOCK_WITH_TYPE_1(                       \
    size, Func, T, grid_dim, cuda_stream, ...)                                \
  do {                                                                        \
    if (size >= 128) {                                                        \
      Func<T, 1, 128>                                                         \
          <<<grid_dim, dim3(1, 128), 0, cuda_stream>>>(__VA_ARGS__);          \
    } else if (size >= 64) {                                                  \
      Func<T, 2, 64><<<grid_dim, dim3(2, 64), 0, cuda_stream>>>(__VA_ARGS__); \
    } else if (size >= 32) {                                                  \
      Func<T, 4, 32><<<grid_dim, dim3(4, 32), 0, cuda_stream>>>(__VA_ARGS__); \
    } else {                                                                  \
      Func<T, 8, 16><<<grid_dim, dim3(8, 16), 0, cuda_stream>>>(__VA_ARGS__); \
    }                                                                         \
  } while (false)

#define DISPATCH_REDUCE_KERNEL_BY_2D_BLOCK_WITH_TYPE_2(              \
    size, Func, T1, T2, grid_dim, cuda_stream, ...)                  \
  do {                                                               \
    if (size >= 128) {                                               \
      Func<T1, T2, 1, 128>                                           \
          <<<grid_dim, dim3(1, 128), 0, cuda_stream>>>(__VA_ARGS__); \
    } else if (size >= 64) {                                         \
      Func<T1, T2, 2, 64>                                            \
          <<<grid_dim, dim3(2, 64), 0, cuda_stream>>>(__VA_ARGS__);  \
    } else if (size >= 32) {                                         \
      Func<T1, T2, 4, 32>                                            \
          <<<grid_dim, dim3(4, 32), 0, cuda_stream>>>(__VA_ARGS__);  \
    } else {                                                         \
      Func<T1, T2, 8, 16>                                            \
          <<<grid_dim, dim3(8, 16), 0, cuda_stream>>>(__VA_ARGS__);  \
    }                                                                \
  } while (false)

} // namespace caffe2

#endif // CAFFE2_UTILS_MATH_REDUCE_CUH_