1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
|
#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/multi_class_accuracy_op.h"
#include "caffe2/utils/GpuAtomics.cuh"
#include "caffe2/utils/math.h"
namespace caffe2 {
namespace {
__global__ void MultiClassAccuracyKernel(const int N, const int D, const float* Xdata,
const int* labeldata, float* accuracies, int* amounts) {
CUDA_1D_KERNEL_LOOP(i, N) {
float maxval = Xdata[i * D];
int maxid = 0;
for (int j = 1; j < D; ++j) {
if (Xdata[i * D + j] > maxval) {
maxval = Xdata[i * D + j];
maxid = j;
}
}
int labelid = labeldata[i];
if (maxid == labelid) {
gpu_atomic_add(accuracies + labelid, static_cast<float>(1));
}
gpu_atomic_add(amounts + labelid, static_cast<int>(1));
}
}
__global__ void MultiClassAccuracyDivideKernel(
const int D, float* accuracies, const int* amounts) {
CUDA_1D_KERNEL_LOOP(i, D) {
if (amounts[i]) {
accuracies[i] /= amounts[i];
}
}
}
} // namespace
template <>
bool MultiClassAccuracyOp<float, CUDAContext>::RunOnDevice() {
auto& X = Input(PREDICTION);
auto& label = Input(LABEL);
TORCH_DCHECK_EQ(X.dim(), 2);
// amount, number of instances
int N = X.dim32(0);
// dimension, number of classes
int D = X.dim32(1);
TORCH_DCHECK_EQ(label.dim(), 1);
TORCH_DCHECK_EQ(label.dim32(0), N);
auto* Y0 = Output(0, {D}, at::dtype<float>());
auto* Y1 = Output(1, {D}, at::dtype<int>());
const float* Xdata = X.data<float>();
const int* labeldata = label.data<int>();
float* accuracies = Y0->template mutable_data<float>();
int* amounts = Y1->template mutable_data<int>();
math::Set<float, CUDAContext>(D, 0.0, accuracies, &context_);
math::Set<int, CUDAContext>(D, 0, amounts, &context_);
MultiClassAccuracyKernel<<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS,
0, context_.cuda_stream()>>>(
N, D, Xdata, labeldata, accuracies, amounts);
C10_CUDA_KERNEL_LAUNCH_CHECK();
MultiClassAccuracyDivideKernel<<<CAFFE_GET_BLOCKS(D), CAFFE_CUDA_NUM_THREADS,
0, context_.cuda_stream()>>>(
D, accuracies, amounts);
C10_CUDA_KERNEL_LAUNCH_CHECK();
return true;
}
REGISTER_CUDA_OPERATOR(
MultiClassAccuracy, MultiClassAccuracyOp<float, CUDAContext>);
} // namespace caffe2
|