File: manual_awkward_index_rpad_and_clip_axis1.cu

package info (click to toggle)
python-awkward 2.6.5-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 23,088 kB
  • sloc: python: 148,689; cpp: 33,562; sh: 432; makefile: 21; javascript: 8
file content (51 lines) | stat: -rw-r--r-- 1,209 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#define FILENAME(line) FILENAME_FOR_EXCEPTIONS_CUDA("src/cuda-kernels/manual_awkward_index_rpad_and_clip_axis1.cu", line)

#include "awkward/kernels.h"
#include "standard_parallel_algorithms.h"

template <typename T>
__global__ void
awkward_index_rpad_and_clip_axis1_kernel(
    T* tostarts,
    T* tostops,
    int64_t target,
    int64_t length) {
  int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x;

  if(thread_id < length) {
    tostarts[thread_id] = thread_id * target;
    tostops[thread_id] = (thread_id + 1) * target;
  }
}

template <typename T>
ERROR awkward_index_rpad_and_clip_axis1(
    T* tostarts,
    T* tostops,
    int64_t target,
    int64_t length) {

  dim3 blocks_per_grid = blocks(length);
  dim3 threads_per_block = threads(length);

  awkward_index_rpad_and_clip_axis1_kernel<<<blocks_per_grid, threads_per_block>>>(
      tostarts,
      tostops,
      target,
      length);

  cudaDeviceSynchronize();

  return success();
}
ERROR awkward_index_rpad_and_clip_axis1_64(
    int64_t* tostarts,
    int64_t* tostops,
    int64_t target,
    int64_t length) {
  return awkward_index_rpad_and_clip_axis1<int64_t>(
      tostarts,
      tostops,
      target,
      length);
}