File: fully_connected_dnnlowp_acc16_op_test.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
links: PTS, VCS
area: main
in suites: bookworm
size: 139,252 kB
sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (234 lines) | stat: -rw-r--r-- 8,324 bytes
parent folder | download | duplicates (2)


import collections

import caffe2.python.hypothesis_test_util as hu
import hypothesis.strategies as st
import numpy as np
from caffe2.python import core, dyndep, workspace
from caffe2.quantization.server import utils as dnnlowp_utils
from caffe2.quantization.server.dnnlowp_test_utils import (
    check_quantized_results_close,
    run_conv_or_fc
)
from hypothesis import given


dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops")
workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"])


class DNNLowPFullyConnectedAcc16OpTest(hu.HypothesisTestCase):
    # correctness test with no quantization error in inputs
    # fbgemm currently only supports N a multiple of 64
    @given(
        input_channels=st.sampled_from([32, 64]),
        output_channels=st.sampled_from([64, 128, 256]),
        batch_size=st.sampled_from([0, 32, 64, 128, 256]),
        in_quantized=st.booleans(),
        out_quantized=st.booleans(),
        **hu.gcs_cpu_only
    )
    def test_dnnlowp_fully_connected_acc16_int(
        self,
        input_channels,
        output_channels,
        batch_size,
        in_quantized,
        out_quantized,
        gc,
        dc,
    ):
        # X and W have scale 1, so exactly represented after quantization
        # This was made sure by having at least one 0 and one 255 for unsigned
        # 8-bit tensors, and at least one -128 and one 127 for signed 8-bit
        # tensors.
        # Since fbgemm_acc16 accumulates to 16-bit, To avoid overflow, we use
        # small numbers except for those 0, 255, -128, and 127, for this test
        # We also make sure 255, -128, or 127 are not multiplied together by
        # putting them in different input channels and the corresponding input
        # channel in other matrix is 0.
        # For example, we put 255 in input channel 1 in X, so we make the
        # corresponding input channel in W all zeros.
        X_min = -77
        X_max = X_min + 255
        X = np.round(np.random.rand(batch_size, input_channels) * 4 + X_min)
        X = X.astype(np.float32)
        X[:, 0] = X_min
        if batch_size != 0:
            X[0, 1] = X_max

        W_min = -100
        W_max = W_min + 255
        W = np.round(
            np.random.rand(output_channels, input_channels) * 4 - 2 + W_min + 128
        )
        W = W.astype(np.float32)
        W[0, 0] = W_min
        W[1, 0] = W_max
        W[:, 1] = W_min + 128

        # No input quantization error in bias
        b = np.round(np.random.randn(output_channels)).astype(np.float32)

        Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])
        outputs = []

        op_engine_list = [
            ("FC", ""),
            ("FC", "DNNLOWP_ACC16"),
            ("Int8FC", "DNNLOWP_ACC16"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized
            do_dequantize = "DNNLOWP" in engine and out_quantized

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize", ["X"], ["X_q"], engine="DNNLOWP", device_option=gc
                )
                net.Proto().op.extend([quantize])

            fc = core.CreateOperator(
                op_type,
                ["X_q" if do_quantize else "X", "W", "b"],
                ["Y_q" if do_dequantize else "Y"],
                dequantize_output=(0 if do_dequantize else 1),
                engine=engine,
                device_option=gc,
            )
            net.Proto().op.extend([fc])

            if do_dequantize:
                dequantize = core.CreateOperator(
                    "Dequantize", ["Y_q"], ["Y"], engine="DNNLOWP", device_option=gc
                )
                net.Proto().op.extend([dequantize])

            run_conv_or_fc(
                self, None, net, X, W, b, op_type, engine, None, gc, outputs
            )

        check_quantized_results_close(outputs)

    @given(
        input_channels=st.sampled_from([2]),
        output_channels=st.sampled_from([4]),
        batch_size=st.sampled_from([0, 1]),
        nbits_in_non_outlier=st.sampled_from([0, 6]),
        in_quantized=st.booleans(),
        out_quantized=st.booleans(),
        prepack_weight=st.booleans(),
        **hu.gcs_cpu_only
    )
    def test_dnnlowp_fully_connected_acc16_outlier(
        self,
        input_channels,
        output_channels,
        batch_size,
        nbits_in_non_outlier,
        in_quantized,
        out_quantized,
        prepack_weight,
        gc,
        dc,
    ):
        # X and W have scale 1, so exactly represented after quantization
        # This was made sure by having at least one 0 and one 255 for unsigned
        # 8-bit tensors, and at least one -128 and one 127 for signed 8-bit
        # tensors.
        # Since fbgemm_acc16 accumulates to 16-bit, To avoid overflow, we use
        # small numbers except for those 0, 255, -128, and 127, for this test
        # We also make sure 255, -128, or 127 are not multiplied together by
        # putting them in different input channels and the corresponding input
        # channel in other matrix is 0.
        # For example, we put 255 in input channel 1 in X, so we make the
        # corresponding input channel in W all zeros.
        X_min = -77
        X_max = X_min + 255
        X = np.round(np.random.rand(batch_size, input_channels) * 4 + X_min)
        X = X.astype(np.float32)
        X[:, 0] = X_min
        if batch_size != 0:
            X[0, 1] = X_max

        W_min = -100
        W_max = W_min + 255
        W = np.round(
            np.random.rand(output_channels, input_channels) * 4 - 2 + W_min + 128
        )
        W = W.astype(np.float32)
        W[0, 0] = W_min
        W[1, 0] = W_max
        W[:, 1] = W_min + 128
        # No input quantization error in bias
        b = np.round(np.random.randn(output_channels)).astype(np.float32)
        Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])
        outputs = []

        op_engine_list = [
            ("FC", ""),
            ("FC", "DNNLOWP_ACC16"),
            ("Int8FC", "DNNLOWP_ACC16"),
        ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized
            do_dequantize = "DNNLOWP" in engine and out_quantized
            do_prepack_weight = engine == "DNNLOWP" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize", ["X"], ["X_q"], engine="DNNLOWP", device_option=gc
                )
                net.Proto().op.extend([quantize])

            X_min = 0 if X.size == 0 else X.min()
            X_max = 0 if X.size == 0 else X.max()
            x_q_param = dnnlowp_utils.choose_quantization_params(X_min, X_max)

            if do_prepack_weight:
                inputs = ["W"]
                if do_dequantize:
                    inputs += ["b"]
                pack = core.CreateOperator(
                    "Int8FCPackWeight",
                    inputs,
                    ["W_packed"],
                    in_scale=x_q_param.scale,
                    engine=engine,
                )
                init_net.Proto().op.extend([pack])

            fc = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else "W",
                    "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                dequantize_output=(0 if do_dequantize else 1),
                engine=engine,
                nbits_in_non_outlier=nbits_in_non_outlier,
                device_option=gc,
            )
            net.Proto().op.extend([fc])

            if do_dequantize:
                dequantize = core.CreateOperator(
                    "Dequantize", ["Y_q"], ["Y"], engine="DNNLOWP", device_option=gc
                )
                net.Proto().op.extend([dequantize])

            run_conv_or_fc(
                self, init_net, net, X, W, b, op_type, engine, None, gc, outputs
            )

        check_quantized_results_close(outputs)