1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
#define FILENAME(line) FILENAME_FOR_EXCEPTIONS_CUDA("src/cuda-kernels/manual_awkward_ListOffsetArray_rpad_axis1.cu", line)
#include "awkward/kernels.h"
#include "standard_parallel_algorithms.h"
template <typename T, typename C>
__global__
void awkward_ListOffsetArray_rpad_axis1_kernel(
T* toindex,
const C* fromoffsets,
int64_t fromlength,
int64_t target) {
int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
int64_t thread_idy = blockIdx.y * blockDim.y + threadIdx.y;
if(thread_idx < fromlength) {
int64_t rangeval = (T)(fromoffsets[thread_idx + 1] - fromoffsets[thread_idx]);
if(thread_idy < rangeval) {
toindex[thread_idx * target + thread_idy] = (T)fromoffsets[thread_idx] + thread_idy;
}
else if(thread_idy >= rangeval && thread_idy < target) {
toindex[thread_idx * target + thread_idy] = -1;
}
}
}
template <typename T, typename C>
ERROR awkward_ListOffsetArray_rpad_axis1(
T* toindex,
const C* fromoffsets,
int64_t fromlength,
int64_t target) {
dim3 blocks_per_grid = blocks_2d(fromlength, target);
dim3 threads_per_block = threads_2d(fromlength, target);
awkward_ListOffsetArray_rpad_axis1_kernel<T, C><<<blocks_per_grid, threads_per_block>>>(
toindex,
fromoffsets,
fromlength,
target);
cudaDeviceSynchronize();
return success();
}
ERROR awkward_ListOffsetArray32_rpad_axis1_64(
int64_t* toindex,
const int32_t* fromoffsets,
int64_t fromlength,
int64_t target) {
return awkward_ListOffsetArray_rpad_axis1<int64_t, int32_t>(
toindex,
fromoffsets,
fromlength,
target);
}
ERROR awkward_ListOffsetArrayU32_rpad_axis1_64(
int64_t* toindex,
const uint32_t* fromoffsets,
int64_t fromlength,
int64_t target) {
return awkward_ListOffsetArray_rpad_axis1<int64_t, uint32_t>(
toindex,
fromoffsets,
fromlength,
target);
}
ERROR awkward_ListOffsetArray64_rpad_axis1_64(
int64_t* toindex,
const int64_t* fromoffsets,
int64_t fromlength,
int64_t target) {
return awkward_ListOffsetArray_rpad_axis1<int64_t, int64_t>(
toindex,
fromoffsets,
fromlength,
target);
}
|