File: one_hot_ops.cu

package info (click to toggle)
pytorch 1.7.1-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 80,340 kB
  • sloc: cpp: 670,830; python: 343,991; ansic: 67,845; asm: 5,503; sh: 2,924; java: 2,888; xml: 266; makefile: 244; ruby: 148; yacc: 144; objc: 51; lex: 44
file content (35 lines) | stat: -rw-r--r-- 929 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include <cub/block/block_reduce.cuh>

#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/one_hot_ops.h"

namespace caffe2 {

__global__ void OneHotOpKernel(
    const int64_t batch_size,
    const int64_t index_size,
    const int64_t* indices,
    float* output) {
  CUDA_1D_KERNEL_LOOP(i, batch_size) {
    output[i * index_size + indices[i]] = 1.;
  }
}

template <>
void OneHotOp<CUDAContext>::DoOneHotOp(
    int64_t batch_size,
    int64_t index_size,
    const Tensor& indices,
    Tensor* output) {
  float* output_ptr = output->template mutable_data<float>();
  math::Set<float, CUDAContext>(output->numel(), 0., output_ptr, &context_);
  OneHotOpKernel<<<
      CAFFE_GET_BLOCKS(batch_size),
      CAFFE_CUDA_NUM_THREADS,
      0,
      context_.cuda_stream()>>>(
      batch_size, index_size, indices.data<int64_t>(), output_ptr);
}

REGISTER_CUDA_OPERATOR(OneHot, OneHotOp<CUDAContext>);
} // namespace