1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
|
#include "caffe2/operators/lengths_reducer_rowwise_8bit_ops.h"
#include "c10/util/Registry.h"
namespace caffe2 {
REGISTER_CPU_OPERATOR(
Rowwise8BitQuantizedToFloat,
Rowwise8BitQuantizedToFloatOp<CPUContext>);
REGISTER_CPU_OPERATOR(
FloatToRowwiseQuantized8Bits,
FloatToRowwiseQuantized8BitsOp<CPUContext>);
REGISTER_CPU_OPERATOR(
SparseLengthsSum8BitsRowwise,
SparseLengths8BitsRowwiseOp<CPUContext>);
REGISTER_CPU_OPERATOR(
SparseLengthsWeightedSum8BitsRowwise,
SparseLengths8BitsRowwiseOp<CPUContext, 1>);
REGISTER_CPU_OPERATOR(
SparseLengthsMean8BitsRowwise,
SparseLengths8BitsRowwiseOp<CPUContext, 0, 1>);
REGISTER_CPU_OPERATOR(
SparseLengthsWeightedMean8BitsRowwise,
SparseLengths8BitsRowwiseOp<CPUContext, 1, 1>);
OPERATOR_SCHEMA(SparseLengthsSum8BitsRowwise)
.NumInputs(4)
.NumOutputs(1)
.ValueLengthInputFillers(
SparseLengths8BitsRowwiseOp<CPUContext>::DATA,
SparseLengths8BitsRowwiseOp<CPUContext>::LENGTHS)
.SetDoc(R"DOC(
Variation of SparseLengthsSum operator, where DATA is
stored using 8bits. DATA was quantized with 8Bit row-wise
quantization (see doc to FloatToRowwiseQuantized8Bits operator). To
restore DATA from 8Bit, we use additional input that stores scales
and biases.
)DOC")
.Input(
0,
"DATA",
"uint8 tensor obtained with "
"operator FloatToRowwiseQuantized8Bits")
.Input(
1,
"INDICES",
"Integer vector containing indices of the first "
"dimension of DATA for the slices that are being aggregated")
.Input(
2,
"LENGTHS",
"Vector with the same sum of elements as the first dimension of DATA")
.Input(
3,
"scale_bias",
"Matrix of floats, each row r_i of which stores a pair "
"s_i, b_i -- scale and bias for i-th row")
.Output(0, "output", "output");
OPERATOR_SCHEMA(SparseLengthsWeightedSum8BitsRowwise)
.NumInputs(5)
.NumOutputs(1)
.ValueLengthInputFillers(
// NOLINTNEXTLINE(modernize-use-bool-literals)
SparseLengths8BitsRowwiseOp<CPUContext, 1>::DATA,
// NOLINTNEXTLINE(modernize-use-bool-literals)
SparseLengths8BitsRowwiseOp<CPUContext, 1>::LENGTHS)
.SetDoc(R"DOC(
Variation of SparseLengthsWeightedSum operator, where
DATA is stored using 8bits. DATA was quantized with 8Bit row-wise
quantization (see doc to FloatToRowwiseQuantized8Bits operator). To
restore DATA from 8Bit, we use additional input that stores scales
and biases.
)DOC")
.Input(
0,
"DATA",
"uint8 tensor obtained with "
"operator FloatToRowwiseQuantized8Bits")
.Input(
1,
"SCALARS",
"Scalar multipliers for the input slices. Must "
"be a vector with the length matching the length of INDICES")
.Input(
2,
"INDICES",
"Integer vector containing indices of the first "
"dimension of DATA for the slices that are being aggregated")
.Input(
3,
"LENGTHS",
"Vector with the same sum of elements as the first dimension of DATA")
.Input(
4,
"scale_bias",
"Matrix of floats, each row r_i of which stores a pair "
"s_i, b_i -- scale and bias for i-th row")
.Output(0, "output", "output");
OPERATOR_SCHEMA(SparseLengthsMean8BitsRowwise)
.NumInputs(4)
.NumOutputs(1)
.ValueLengthInputFillers(
// NOLINTNEXTLINE(modernize-use-bool-literals)
SparseLengths8BitsRowwiseOp<CPUContext, 0, 1>::DATA,
// NOLINTNEXTLINE(modernize-use-bool-literals)
SparseLengths8BitsRowwiseOp<CPUContext, 0, 1>::LENGTHS)
.SetDoc(R"DOC(
Variation of SparseLengthsMean operator, where DATA is
stored using 8bits. DATA was quantized with 8Bit row-wise
quantization (see doc to FloatToRowwiseQuantized8Bits operator). To
restore DATA from 8Bit, we use additional input that stores scales
and biases.
)DOC")
.Input(
0,
"DATA",
"uint8 tensor obtained with "
"operator FloatToRowwiseQuantized8Bits")
.Input(
1,
"INDICES",
"Integer vector containing indices of the first "
"dimension of DATA for the slices that are being aggregated")
.Input(
2,
"LENGTHS",
"Vector with the same sum of elements as the first dimension of DATA")
.Input(
3,
"scale_bias",
"Matrix of floats, each row r_i of which stores a pair "
"s_i, b_i -- scale and bias for i-th row")
.Output(0, "output", "output");
OPERATOR_SCHEMA(SparseLengthsWeightedMean8BitsRowwise)
.NumInputs(5)
.NumOutputs(1)
.ValueLengthInputFillers(
// NOLINTNEXTLINE(modernize-use-bool-literals)
SparseLengths8BitsRowwiseOp<CPUContext, 1, 1>::DATA,
// NOLINTNEXTLINE(modernize-use-bool-literals)
SparseLengths8BitsRowwiseOp<CPUContext, 1, 1>::LENGTHS)
.SetDoc(R"DOC(
Variation of SparseLengthsWeightedMean operator, where
DATA is stored using 8bits. DATA was quantized with 8Bit row-wise
quantization (see doc to FloatToRowwiseQuantized8Bits operator). To
restore DATA from 8Bit, we use additional input that stores scales
and biases.
)DOC")
.Input(
0,
"DATA",
"uint8 tensor obtained with "
"operator FloatToRowwiseQuantized8Bits")
.Input(
1,
"SCALARS",
"Scalar multipliers for the input slices. Must "
"be a vector with the length matching the length of INDICES")
.Input(
2,
"INDICES",
"Integer vector containing indices of the first "
"dimension of DATA for the slices that are being aggregated")
.Input(
3,
"LENGTHS",
"Vector with the same sum of elements as the first dimension of DATA")
.Input(
4,
"scale_bias",
"Matrix of floats, each row r_i of which stores a pair "
"s_i, b_i -- scale and bias for i-th row")
.Output(0, "output", "output");
OPERATOR_SCHEMA(FloatToRowwiseQuantized8Bits)
.NumInputs(1)
.NumOutputs(2)
.ValueLengthInputFillers(
SparseLengths8BitsRowwiseOp<CPUContext>::DATA,
SparseLengths8BitsRowwiseOp<CPUContext>::LENGTHS)
.SetDoc(R"DOC(
This operator applies 8Bit row-wise quantization to
input tensor and returns quantized tensor. Row wise quantization of
input tensor is the following process. We take tensor of size
(m_1, m_2,...,m_n), n >= 2, reshape it into matrix of size
(m_1, m_2 x... x m_n) and apply row-wise quantization. After this,
we compute scale_i= (min_i - max_i) / 255 and bias_i = min_i for
i-th row r_i of reshaped matrix, where min_i and max_i -- minimum
and maximum elements of i-th row, and quantize each element r_{ij} as
0 <= round(r_ij - bias_i) / scale_i) < 256. Instead of input tensor
we obtain uint8 tensor and auxiliary information as scale and bias to
restore input tensor (with losses).
)DOC")
.Input(0, "input", "input")
.Output(0, "quantized_input", "quantized_input")
.Output(
1,
"scale_bias",
"Matrix of floats, each row r_i of which stores a pair "
"s_i, b_i");
OPERATOR_SCHEMA(Rowwise8BitQuantizedToFloat)
.NumInputs(2)
.NumOutputs(1)
.ValueLengthInputFillers(
SparseLengths8BitsRowwiseOp<CPUContext>::DATA,
SparseLengths8BitsRowwiseOp<CPUContext>::LENGTHS)
.SetDoc(R"DOC(
Given uint8 tensor, quantized using 8bit row-wise
quantization, and auxiliary scales and biases, this operator
restores float tensor in the following way. We take input 8bits tensor
of size (m_1, m_2, ..., m_n), n >= 2, reshape it into matrix of size
(m_1, m_2 x... x m_n). We compute element r_{ij} of output matrix as
r_{ij} * s_i + b_i and after this we reshape this output matrix into
output tensor of size (m_1, m_2, ..., m_n).
)DOC")
.Input(0, "quantized_input", "quantized_input")
.Input(
1,
"scale_bias",
"Matrix of floats, each row r_i of which stores a pair "
"s_i, b_i -- scale and bias for i-th row")
.Output(1, "output", "output");
NO_GRADIENT(Rowwise8BitQuantizedToFloat);
NO_GRADIENT(FloatToRowwiseQuantized8Bits);
NO_GRADIENT(SparseLengthsSum8BitsRowwise);
NO_GRADIENT(SparseLengthsWeightedSum8BitsRowwise);
NO_GRADIENT(SparseLengthsMean8BitsRowwise);
NO_GRADIENT(SparseLengthsWeightedMean8BitsRowwise);
}
|