1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
|
# Owner(s): ["oncall: quantization"]
import torch
from torch import quantize_per_tensor
from torch.ao.quantization.observer import MinMaxObserver
from torch.ao.quantization.experimental.observer import APoTObserver
from torch.ao.quantization.experimental.quantizer import APoTQuantizer, quantize_APoT, dequantize_APoT
import unittest
import random
class TestQuantizer(unittest.TestCase):
r""" Tests quantize_APoT result on random 1-dim tensor
and hardcoded values for b, k by comparing to uniform quantization
(non-uniform quantization reduces to uniform for k = 1)
quantized tensor (https://pytorch.org/docs/stable/generated/torch.quantize_per_tensor.html)
* tensor2quantize: Tensor
* b: 8
* k: 1
"""
def test_quantize_APoT_rand_k1(self):
# generate random size of tensor2quantize between 1 -> 20
size = random.randint(1, 20)
# generate tensor with random fp values between 0 -> 1000
tensor2quantize = 1000 * torch.rand(size, dtype=torch.float)
apot_observer = APoTObserver(b=8, k=1)
apot_observer(tensor2quantize)
alpha, gamma, quantization_levels, level_indices = apot_observer.calculate_qparams(signed=False)
# get apot quantized tensor result
qtensor = quantize_APoT(tensor2quantize=tensor2quantize,
alpha=alpha,
gamma=gamma,
quantization_levels=quantization_levels,
level_indices=level_indices)
# get uniform quantization quantized tensor result
uniform_observer = MinMaxObserver()
uniform_observer(tensor2quantize)
scale, zero_point = uniform_observer.calculate_qparams()
uniform_quantized = quantize_per_tensor(input=tensor2quantize,
scale=scale,
zero_point=zero_point,
dtype=torch.quint8).int_repr()
qtensor_data = qtensor.data.int()
uniform_quantized_tensor = uniform_quantized.data.int()
self.assertTrue(torch.equal(qtensor_data, uniform_quantized_tensor))
r""" Tests quantize_APoT for k != 1.
Tests quantize_APoT result on random 1-dim tensor and hardcoded values for
b=4, k=2 by comparing results to hand-calculated results from APoT paper
https://arxiv.org/pdf/1909.13144.pdf
* tensor2quantize: Tensor
* b: 4
* k: 2
"""
def test_quantize_APoT_k2(self):
r"""
given b = 4, k = 2, alpha = 1.0, we know:
(from APoT paper example: https://arxiv.org/pdf/1909.13144.pdf)
quantization_levels = tensor([0.0000, 0.0208, 0.0417, 0.0625, 0.0833, 0.1250, 0.1667,
0.1875, 0.2500, 0.3333, 0.3750, 0.5000, 0.6667, 0.6875, 0.7500, 1.0000])
level_indices = tensor([ 0, 3, 12, 15, 2, 14, 8, 11, 10, 1, 13, 9, 4, 7, 6, 5]))
"""
# generate tensor with random fp values
tensor2quantize = torch.tensor([0, 0.0215, 0.1692, 0.385, 1, 0.0391])
observer = APoTObserver(b=4, k=2)
observer.forward(tensor2quantize)
alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False)
# get apot quantized tensor result
qtensor = quantize_APoT(tensor2quantize=tensor2quantize,
alpha=alpha,
gamma=gamma,
quantization_levels=quantization_levels,
level_indices=level_indices)
qtensor_data = qtensor.data.int()
# expected qtensor values calculated based on
# corresponding level_indices to nearest quantization level
# for each fp value in tensor2quantize
# e.g.
# 0.0215 in tensor2quantize nearest 0.0208 in quantization_levels -> 3 in level_indices
expected_qtensor = torch.tensor([0, 3, 8, 13, 5, 12], dtype=torch.int32)
self.assertTrue(torch.equal(qtensor_data, expected_qtensor))
r""" Tests dequantize_apot result on random 1-dim tensor
and hardcoded values for b, k.
Dequant -> quant an input tensor and verify that
result is equivalent to input
* tensor2quantize: Tensor
* b: 4
* k: 2
"""
def test_dequantize_quantize_rand_b4(self):
# make observer
observer = APoTObserver(4, 2)
# generate random size of tensor2quantize between 1 -> 20
size = random.randint(1, 20)
# make tensor2quantize: random fp values between 0 -> 1000
tensor2quantize = 1000 * torch.rand(size, dtype=torch.float)
observer.forward(tensor2quantize)
alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False)
# make mock apot_tensor
original_apot = quantize_APoT(tensor2quantize=tensor2quantize,
alpha=alpha,
gamma=gamma,
quantization_levels=quantization_levels,
level_indices=level_indices)
original_input = torch.clone(original_apot.data).int()
# dequantize apot_tensor
dequantize_result = dequantize_APoT(apot_tensor=original_apot)
# quantize apot_tensor
final_apot = quantize_APoT(tensor2quantize=dequantize_result,
alpha=alpha,
gamma=gamma,
quantization_levels=quantization_levels,
level_indices=level_indices)
result = final_apot.data.int()
self.assertTrue(torch.equal(original_input, result))
r""" Tests dequantize_apot result on random 1-dim tensor
and hardcoded values for b, k.
Dequant -> quant an input tensor and verify that
result is equivalent to input
* tensor2quantize: Tensor
* b: 12
* k: 4
"""
def test_dequantize_quantize_rand_b6(self):
# make observer
observer = APoTObserver(12, 4)
# generate random size of tensor2quantize between 1 -> 20
size = random.randint(1, 20)
# make tensor2quantize: random fp values between 0 -> 1000
tensor2quantize = 1000 * torch.rand(size, dtype=torch.float)
observer.forward(tensor2quantize)
alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False)
# make mock apot_tensor
original_apot = quantize_APoT(tensor2quantize=tensor2quantize,
alpha=alpha,
gamma=gamma,
quantization_levels=quantization_levels,
level_indices=level_indices)
original_input = torch.clone(original_apot.data).int()
# dequantize apot_tensor
dequantize_result = dequantize_APoT(apot_tensor=original_apot)
# quantize apot_tensor
final_apot = quantize_APoT(tensor2quantize=dequantize_result,
alpha=alpha,
gamma=gamma,
quantization_levels=quantization_levels,
level_indices=level_indices)
result = final_apot.data.int()
self.assertTrue(torch.equal(original_input, result))
r""" Tests for correct dimensions in dequantize_apot result
on random 3-dim tensor with random dimension sizes
and hardcoded values for b, k.
Dequant an input tensor and verify that
dimensions are same as input.
* tensor2quantize: Tensor
* b: 4
* k: 2
"""
def test_dequantize_dim(self):
# make observer
observer = APoTObserver(4, 2)
# generate random size of tensor2quantize between 1 -> 20
size1 = random.randint(1, 20)
size2 = random.randint(1, 20)
size3 = random.randint(1, 20)
# make tensor2quantize: random fp values between 0 -> 1000
tensor2quantize = 1000 * torch.rand(size1, size2, size3, dtype=torch.float)
observer.forward(tensor2quantize)
alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False)
# make mock apot_tensor
original_apot = quantize_APoT(tensor2quantize=tensor2quantize,
alpha=alpha,
gamma=gamma,
quantization_levels=quantization_levels,
level_indices=level_indices)
# dequantize apot_tensor
dequantize_result = dequantize_APoT(apot_tensor=original_apot)
self.assertEqual(original_apot.data.size(), dequantize_result.size())
def test_q_apot_alpha(self):
with self.assertRaises(NotImplementedError):
APoTQuantizer.q_apot_alpha(self)
if __name__ == '__main__':
unittest.main()
|