1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
#ifndef CAFFE2_OPERATORS_INT8_GIVEN_TENSOR_FILL_OP_H_
#define CAFFE2_OPERATORS_INT8_GIVEN_TENSOR_FILL_OP_H_
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/tensor_int8.h"
#include "caffe2/operators/filler_op.h"
#include "caffe2/utils/cast.h"
#include "caffe2/utils/math.h"
namespace caffe2 {
namespace int8 {
class Int8GivenTensorFillOp final : public Operator<CPUContext> {
public:
template <class... Args>
explicit Int8GivenTensorFillOp(Args&&... args)
: Operator<CPUContext>(std::forward<Args>(args)...),
scale_(this->template GetSingleArgument<float>("Y_scale", 1.0)),
zero_point_(
this->template GetSingleArgument<int32_t>("Y_zero_point", 0)),
shape_(this->template GetRepeatedArgument<int64_t>("shape")) {
ExtractValues();
}
bool RunOnDevice() override {
auto* output = Outputs()[0]->template GetMutable<Int8TensorCPU>();
ReinitializeTensor(&output->t, shape_, at::dtype<uint8_t>().device(CPU));
output->scale = scale_;
output->zero_point = zero_point_;
return Fill(output);
}
private:
void ExtractValues() {
auto source_values = this->template GetSingleArgument<string>("values", "");
ReinitializeTensor(
&values_,
{static_cast<int64_t>(source_values.size())},
at::dtype<uint8_t>().device(CPU));
uint8_t* values_data = values_.template mutable_data<uint8_t>();
for (const auto i : c10::irange(source_values.size())) {
values_data[i] = static_cast<uint8_t>(source_values[i]);
}
}
bool Fill(Int8TensorCPU* output) {
TORCH_DCHECK_EQ(output->t.numel(), values_.numel())
<< "output size: " << output->t.numel()
<< " given size: " << values_.numel();
auto* data = output->t.template mutable_data<uint8_t>();
const uint8_t* values_data = values_.template data<uint8_t>();
if (output->t.numel()) {
context_.template CopySameDevice<uint8_t>(
output->t.numel(), values_data, data);
}
return true;
}
float scale_;
int32_t zero_point_;
vector<int64_t> shape_;
Tensor values_;
};
class Int8GivenIntTensorFillOp final : public Operator<CPUContext> {
public:
template <class... Args>
explicit Int8GivenIntTensorFillOp(Args&&... args)
: Operator<CPUContext>(std::forward<Args>(args)...),
scale_(this->template GetSingleArgument<float>("Y_scale", 1.0)),
zero_point_(
this->template GetSingleArgument<int32_t>("Y_zero_point", 0)),
shape_(this->template GetRepeatedArgument<int64_t>("shape")) {
ExtractValues();
}
bool RunOnDevice() override {
auto* output = Outputs()[0]->template GetMutable<Int8TensorCPU>();
output->t.Resize(shape_);
output->scale = scale_;
output->zero_point = zero_point_;
return Fill(output);
}
private:
void ExtractValues() {
auto source_values = this->template GetRepeatedArgument<int32_t>("values");
ReinitializeTensor(
&values_,
{static_cast<int64_t>(source_values.size())},
at::dtype<int32_t>().device(CPU));
auto* values_data = values_.template mutable_data<int32_t>();
for (const auto i : c10::irange(source_values.size())) {
values_data[i] = static_cast<int32_t>(source_values[i]);
}
}
bool Fill(Int8TensorCPU* output) {
TORCH_DCHECK_EQ(output->t.numel(), values_.numel())
<< "output size: " << output->t.numel()
<< " given size: " << values_.numel();
auto* data = output->t.template mutable_data<int32_t>();
const auto* values_data = values_.template data<int32_t>();
if (output->t.numel()) {
context_.template CopySameDevice<int32_t>(
output->t.numel(), values_data, data);
}
return true;
}
float scale_;
int32_t zero_point_;
vector<int64_t> shape_;
Tensor values_;
};
} // namespace int8
} // namespace caffe2
#endif // CAFFE2_OPERATORS_INT8_GIVEN_TENSOR_FILL_OP_H_
|