File: finalize.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (63 lines) | stat: -rw-r--r-- 2,321 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#pragma once

#include <torch/csrc/jit/api/module.h>
#include <torch/csrc/jit/ir/ir.h>
#include <torch/csrc/jit/passes/quantization/quantization_type.h>

namespace torch {
namespace jit {

/** \brief Backend specific pass to fuse dequantize - op - quantize calls
 * as quantized_op calls.
 *
 * Right now this is a fusion for fbgemm backend and only works for quantized
 * conv op, we'll extend to more ops and more backends in the future.
 *
 * Currently supported fusion:
 * q(conv2d(dq(a), dq(w), dq(b))) --> to_nchw(fbgemm_conv2d(prepack(to_nhwc(a)),
 *                                                          prepack(to_nhwc(w)),
 *                                                          prepack(to_nhwc(b))))
 *
 * q(linear(dq(a), dq(w), dq(b))) --> to_nchw(fbgemm_linear(prepack(to_nhwc(a)),
 *                                                          prepack(to_nhwc(w)),
 *                                                          prepack(to_nhwc(b))))
 *
 * \param graph the graph we want to apply fusion
 */
TORCH_API void QuantFusion(
    std::shared_ptr<Graph>& graph,
    QuantType quant_type = QuantType::STATIC);

/** \brief Insert prepack and unpack function in graph
 *  We want add pack/unpack functions for quantized weight because later we want
 * to fold the packed weight as an attribute of the module, in order to reduce
 * the cost of packing the weight on the fly in quantized models.
 *
 *  Each quantized op has it's corresponding prepack/unpack function,
 *  right now, we only need to do prepack/unpack for quantized::linear
 * and quantized::conv2d.
 */
TORCH_API void InsertPrepackUnpack(std::shared_ptr<Graph>& graph);

/** \brief Insert pack and unpack function in all graphs
 *   of module
 *
 *   Go through graphs of all the methods of all child modules
 *   and call InsertPrepackUnpack on the graph.
 */
TORCH_API void InsertPrepackUnpack(Module& module);

TORCH_API script::Module Finalize(
    script::Module& module,
    QuantType quant_type = QuantType::STATIC,
    const std::vector<std::string>& preserved_attrs =
        std::vector<std::string>());

TORCH_API void FoldQuantizedPrepackingOps(Module& module);

TORCH_API Module FinalizeOnDevicePTQ(
    Module& module,
    QuantType quant_type,
    const std::string& method_name);
} // namespace jit
} // namespace torch