1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
import collections
import caffe2.python.hypothesis_test_util as hu
import hypothesis.strategies as st
import numpy as np
from caffe2.python import core, dyndep, utils, workspace
from caffe2.quantization.server import utils as dnnlowp_utils
from caffe2.quantization.server.dnnlowp_test_utils import check_quantized_results_close
from hypothesis import given
dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops")
workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"])
class DNNLowPOpGroupNormTest(hu.HypothesisTestCase):
@given(
N=st.integers(0, 4),
G=st.integers(2, 4),
K=st.integers(2, 12),
H=st.integers(4, 16),
W=st.integers(4, 16),
order=st.sampled_from(["NCHW", "NHWC"]),
in_quantized=st.booleans(),
out_quantized=st.booleans(),
weight_quantized=st.booleans(),
**hu.gcs_cpu_only
)
def test_dnnlowp_group_norm(
self,
N,
G,
K,
H,
W,
order,
in_quantized,
out_quantized,
weight_quantized,
gc,
dc,
):
C = G * K
X = np.random.rand(N, C, H, W).astype(np.float32) * 5.0 - 1.0
if order == "NHWC":
X = utils.NCHW2NHWC(X)
gamma = np.random.rand(C).astype(np.float32) * 2.0 - 1.0
beta = np.random.randn(C).astype(np.float32) - 0.5
Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])
outputs = []
op_engine_list = [
("GroupNorm", ""),
("GroupNorm", "DNNLOWP"),
("Int8GroupNorm", "DNNLOWP"),
]
for op_type, engine in op_engine_list:
net = core.Net("test_net")
do_quantize = "DNNLOWP" in engine and in_quantized
do_dequantize = "DNNLOWP" in engine and out_quantized
do_quantize_weight = (
engine == "DNNLOWP" and weight_quantized and len(outputs) > 0
)
if do_quantize:
quantize = core.CreateOperator(
"Quantize", ["X"], ["X_q"], engine=engine, device_option=gc
)
net.Proto().op.extend([quantize])
if do_quantize_weight:
int8_given_tensor_fill, gamma_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
gamma, "gamma_q"
)
net.Proto().op.extend([int8_given_tensor_fill])
X_min = 0 if X.size == 0 else X.min()
X_max = 0 if X.size == 0 else X.max()
X_q_param = dnnlowp_utils.choose_quantization_params(X_min, X_max)
int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
beta, "beta_q", X_q_param, gamma_q_param
)
net.Proto().op.extend([int8_bias_tensor_fill])
group_norm = core.CreateOperator(
op_type,
[
"X_q" if do_quantize else "X",
"gamma_q" if do_quantize_weight else "gamma",
"beta_q" if do_quantize_weight else "beta",
],
["Y_q" if do_dequantize else "Y"],
dequantize_output=0 if do_dequantize else 1,
group=G,
order=order,
is_test=True,
engine=engine,
device_option=gc,
)
if do_quantize_weight:
# When quantized weight is provided, we can't rescale the
# output dynamically by looking at the range of output of each
# batch, so here we provide the range of output observed from
# fp32 reference implementation
dnnlowp_utils.add_quantization_param_args(group_norm, outputs[0][0])
net.Proto().op.extend([group_norm])
if do_dequantize:
dequantize = core.CreateOperator(
"Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc
)
net.Proto().op.extend([dequantize])
self.ws.create_blob("X").feed(X, device_option=gc)
self.ws.create_blob("gamma").feed(gamma, device_option=gc)
self.ws.create_blob("beta").feed(beta, device_option=gc)
self.ws.run(net)
outputs.append(
Output(Y=self.ws.blobs["Y"].fetch(), op_type=op_type, engine=engine)
)
check_quantized_results_close(outputs, atol_scale=2.0)
|