File: manual_awkward_ListOffsetArray_rpad_axis1.cu

package info (click to toggle)
python-awkward 2.6.5-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 23,088 kB
  • sloc: python: 148,689; cpp: 33,562; sh: 432; makefile: 21; javascript: 8
file content (78 lines) | stat: -rw-r--r-- 2,143 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#define FILENAME(line) FILENAME_FOR_EXCEPTIONS_CUDA("src/cuda-kernels/manual_awkward_ListOffsetArray_rpad_axis1.cu", line)

#include "awkward/kernels.h"
#include "standard_parallel_algorithms.h"

template <typename T, typename C>
__global__
void awkward_ListOffsetArray_rpad_axis1_kernel(
  T* toindex,
  const C* fromoffsets,
  int64_t fromlength,
  int64_t target) {
  int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
  int64_t thread_idy = blockIdx.y * blockDim.y + threadIdx.y;

  if(thread_idx < fromlength) {
    int64_t rangeval = (T)(fromoffsets[thread_idx + 1] - fromoffsets[thread_idx]);

    if(thread_idy < rangeval) {
      toindex[thread_idx * target + thread_idy] = (T)fromoffsets[thread_idx] + thread_idy;
    }
    else if(thread_idy >= rangeval && thread_idy < target) {
      toindex[thread_idx * target + thread_idy] = -1;
    }
  }
}
    
template <typename T, typename C>
ERROR awkward_ListOffsetArray_rpad_axis1(
  T* toindex,
  const C* fromoffsets,
  int64_t fromlength,
  int64_t target) {
  dim3 blocks_per_grid = blocks_2d(fromlength, target);
  dim3 threads_per_block = threads_2d(fromlength, target);

  awkward_ListOffsetArray_rpad_axis1_kernel<T, C><<<blocks_per_grid, threads_per_block>>>(
      toindex,
      fromoffsets,
      fromlength,
      target);

  cudaDeviceSynchronize();
  return success();
}
ERROR awkward_ListOffsetArray32_rpad_axis1_64(
  int64_t* toindex,
  const int32_t* fromoffsets,
  int64_t fromlength,
  int64_t target) {
  return awkward_ListOffsetArray_rpad_axis1<int64_t, int32_t>(
    toindex,
    fromoffsets,
    fromlength,
    target);
}
ERROR awkward_ListOffsetArrayU32_rpad_axis1_64(
  int64_t* toindex,
  const uint32_t* fromoffsets,
  int64_t fromlength,
  int64_t target) {
  return awkward_ListOffsetArray_rpad_axis1<int64_t, uint32_t>(
    toindex,
    fromoffsets,
    fromlength,
    target);
}
ERROR awkward_ListOffsetArray64_rpad_axis1_64(
  int64_t* toindex,
  const int64_t* fromoffsets,
  int64_t fromlength,
  int64_t target) {
  return awkward_ListOffsetArray_rpad_axis1<int64_t, int64_t>(
    toindex,
    fromoffsets,
    fromlength,
    target);
}