1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
#include "dnnlowp.h"
#include <cmath>
#include <iostream>
#include <random>
#include <gtest/gtest.h>
#include "caffe2/core/logging.h"
using namespace std;
using namespace dnnlowp;
TEST(Requantization, BatchRequantizationUnitTest) {
// generate input data
default_random_engine eng;
uniform_int_distribution<int32_t> in_max_dis(
10, numeric_limits<int32_t>::max());
uniform_int_distribution<int> zero_point_dis(0, 255);
constexpr int NITER = 1024;
constexpr int LEN = 77;
vector<int32_t> src(LEN);
vector<uint8_t> expected(LEN), actual(LEN);
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
for (int i = 0; i < NITER; ++i) {
int32_t in_max = in_max_dis(eng);
uniform_int_distribution<int32_t> in_dis(-in_max, in_max);
for (int j = 0; j < LEN; ++j) {
src[j] = in_dis(eng);
}
// Precise real_multiplier will be (255 / in_max) but intentionally use
// a bigger multiplier to test if saturation is handled correctly.
float real_multiplier = 255 / (1.5 * in_max);
TensorQuantizationParams target_qparams;
target_qparams.zero_point = zero_point_dis(eng);
target_qparams.precision = 8;
RequantizationParams params = qfactory->ChooseRequantizationMultiplier(
real_multiplier, target_qparams);
for (int j = 0; j < LEN; ++j) {
expected[j] = fbgemm::clamp(
target_qparams.zero_point +
std::nearbyint(static_cast<double>(src[j]) * real_multiplier),
8);
}
unsigned long long cycle_begin = __rdtsc();
fbgemm::Requantize(src.data(), actual.data(), LEN, params);
unsigned long long cycle_end = __rdtsc();
double elements_per_cycle = (double)LEN / (cycle_end - cycle_begin);
LOG(INFO) << elements_per_cycle << " elements_per_cycle";
for (int j = 0; j < LEN; ++j) {
EXPECT_EQ((int)expected[j], (int)actual[j])
<< "i " << i << " j " << j << " src " << src[j] << " real_multiplier "
<< real_multiplier << " multiplier " << params.multiplier
<< " right_shift " << params.right_shift << " zero_point "
<< target_qparams.zero_point;
}
}
}
TEST(Requantization, RequantizationUnitTest) {
// Rescaling to a random range [min1, max1] to [min2, max2].
// Make sure the ranges include 0 and inputs don't have input quantization
// error
default_random_engine gen;
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
{
// Test 31-bit to 8-bit scaling (the most common one for example used for
// the results of GEMM).
// Dest quantization parameter is pre-determined by actual min/max of the
// values.
// Source scale can vary and zero_offset is 0.
uniform_real_distribution<float> src_scale_exponent_dist(-19, -1);
uniform_real_distribution<float> dst_exponent_dist(0.1, 4);
// Bits used in src_scale plus dst should be <= 23 not to have any
// input quantization error because float has 23 bit precision.
uniform_real_distribution<float> negative_proportion_dist(0, 1);
for (int i = 0; i < 256; ++i) {
TensorQuantizationParams src_qparams;
// scale is 2^-1 ~ 2^-19
src_qparams.scale = powf(2, src_scale_exponent_dist(gen));
src_qparams.zero_point = 0;
src_qparams.precision = 31;
float dst_extend = powf(2, dst_exponent_dist(gen));
float negative_proportion = negative_proportion_dist(gen);
float min = -(dst_extend * negative_proportion);
float max = dst_extend + min;
TensorQuantizationParams dst_qparams =
qfactory->ChooseQuantizationParams(min, max);
// scale = dst_extend / 2^8
// which is between 0.1/2^-8 ~ 2^-4
float real_multiplier = src_qparams.scale / dst_qparams.scale;
RequantizationParams requantization_params =
qfactory->ChooseRequantizationMultiplier(
real_multiplier, dst_qparams);
uniform_real_distribution<float> value_dist(
ceil(min / src_qparams.scale) * src_qparams.scale,
floor(max / src_qparams.scale) * src_qparams.scale);
// round with src_qparams.scale to avoid input quantization error due
// to clipping
float sum_sq = 0, max_err = 0;
constexpr int LEN = 1111;
vector<int32_t> src_q(LEN);
vector<float> src(LEN);
for (int j = 0; j < LEN; ++j) {
float src_orig = value_dist(gen);
src_q[j] = fbgemm::Quantize<int32_t>(
src_orig, 0, src_qparams.scale, 32, true /* signed*/);
src[j] = fbgemm::Dequantize<int32_t>(src_q[j], src_qparams);
// This number shouldn't have any quantization error
EXPECT_EQ(
fbgemm::Quantize<int32_t>(src[j], 0, src_qparams.scale, 32, true),
src_q[j]);
}
vector<uint8_t> dst_q(LEN);
fbgemm::Requantize(
src_q.data(), dst_q.data(), LEN, requantization_params);
for (int j = 0; j < LEN; ++j) {
float dst = fbgemm::Dequantize<uint8_t>(dst_q[j], dst_qparams);
float err = fabsf(dst - src[j]);
sum_sq += err * err;
max_err = std::max(max_err, err);
EXPECT_LE(err, dst_qparams.scale / 1.9);
}
LOG(INFO) << "src_scale " << src_qparams.scale << " dst_extend "
<< dst_extend << " real_multiplier " << real_multiplier
<< " avg_l2_err " << std::sqrt(sum_sq) / 1024 << " max_err "
<< max_err << endl;
// We shouldn't have an error bigger than output quantization error
EXPECT_LE(max_err, dst_qparams.scale / 1.9);
}
}
}
|