1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
#define FILENAME(line) FILENAME_FOR_EXCEPTIONS_CUDA("src/cuda-kernels/manual_awkward_NumpyArray_getitem_boolean_nonzero.cu", line)
#include "awkward/kernels.h"
#include "standard_parallel_algorithms.h"
__global__ void
awkward_NumpyArray_getitem_boolean_nonzero_filter_mask(
const int8_t* fromptr,
int64_t* filtered_index,
int64_t stride,
int64_t length) {
int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x;
if(thread_id < length && thread_id % stride == 0) {
if (fromptr[thread_id] != 0) {
filtered_index[thread_id] = 1;
}
}
}
template <typename T>
__global__ void
awkward_NumpyArray_getitem_boolean_nonzero_kernel(
T* toptr,
int64_t* prefixedsum_mask,
const int8_t* fromptr,
int64_t length,
int64_t stride) {
int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x;
if(thread_id < length && thread_id % stride == 0) {
if(fromptr[thread_id] != 0) {
toptr[prefixedsum_mask[thread_id] - 1] = thread_id;
}
}
}
template <typename T>
ERROR awkward_NumpyArray_getitem_boolean_nonzero(
T* toptr,
const int8_t* fromptr,
int64_t length,
int64_t stride) {
int64_t* res_temp;
int64_t* filtered_index;
dim3 blocks_per_grid = blocks(length);
dim3 threads_per_block = threads(length);
HANDLE_ERROR(cudaMalloc((void**)&res_temp, sizeof(int64_t) * length));
HANDLE_ERROR(cudaMalloc((void**)&filtered_index, sizeof(int64_t) * length));
HANDLE_ERROR(cudaMemset(filtered_index, 0, sizeof(int64_t) * length));
awkward_NumpyArray_getitem_boolean_nonzero_filter_mask<<<
blocks_per_grid,
threads_per_block>>>(fromptr, filtered_index, stride, length);
exclusive_scan<int64_t, int64_t>(res_temp, filtered_index, length);
awkward_NumpyArray_getitem_boolean_nonzero_kernel<<<blocks_per_grid,
threads_per_block>>>(
toptr,
res_temp,
fromptr,
length,
stride);
cudaDeviceSynchronize();
return success();
}
ERROR awkward_NumpyArray_getitem_boolean_nonzero_64(
int64_t* toptr,
const int8_t* fromptr,
int64_t length,
int64_t stride) {
return awkward_NumpyArray_getitem_boolean_nonzero<int64_t>(
toptr,
fromptr,
length,
stride);
}
|