File: example_quant_recipe.yaml

package info (click to toggle)
compressed-tensors 0.9.4-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 908 kB
  • sloc: python: 7,543; makefile: 32
file content (32 lines) | stat: -rw-r--r-- 1,110 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
test_stage:
    quant_modifiers:
        QuantizationModifier:
            ignore:
                - model.layers.0.mlp.down_proj
                - LlamaRotaryEmbedding
                - LlamaRMSNorm
                - SiLU
                - MatMulLeftInput_QK
                - MatMulRightInput_QK
                - MatMulOutput_QK
                - MatMulLeftInput_PV
                - MatMulRightInput_PV
                - MatMulOutput_PV
            scheme_overrides:
                Linear:
                    weights:
                        num_bits: 8
                        symmetric: true
                        strategy: "tensor"
                    input_activations:
                        num_bits: 8
                        symmetric: false
                        strategy: "tensor"
                    output_activations: null
                Embedding:
                    weights:
                        num_bits: 8
                        symmetric: true
                        strategy: "tensor"
                    input_activations: null
                    output_activations: null