File: quantization_utils.h

package info (click to toggle)

chromium 139.0.7258.127-1

links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36

file content (71 lines) | stat: -rw-r--r-- 2,886 bytes

parent folder | download | duplicates (8)

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_
#define COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_

#include <cmath>
#include <cstdint>

namespace language_detection {

// Converts the given floating point value (`x`) to a quantized value, with
// `num_bits` of precision.
//
// Floating Point `min_val` (and all values below it) map to Quantized 0, and,
// Floating Point `max_val` (and all values above it) map to Quantized
//   (1 << num_bits) - 1.
//
// `num_bits` must be greater than 1, and less than 32.
uint32_t FloatToQuantized(float x, float min_val, float max_val, int num_bits);

// Converts the given quantized value (`x`) to a floating point value, with
// `num_bits` of precision.
//
// Floating Point `min_val` (and all values below it) map to Quantized 0, and,
// Floating Point `max_val` (and all values above it) map to Quantized
//   (1 << num_bits) - 1.
//
// `num_bits` must be greater than 1, and less than 32.
float QuantizedToFloat(uint32_t x, float min_val, float max_val, int num_bits);

// Params required for quantizing / dequantizing a given value.
// These are populated by the `GetQuantizationParams` method, and are
// used internally by the `QuantizedToFloat` and `FloatToQuantized`
// methods, or can be used by the caller to cache these values once, and use
// when invoking the `QuantizedToFloatWithQuantParams` method repeatedly on
// values from the same tensor.
struct QuantizationParams {
  float nudged_scale;
  float nudged_min;
  float nudged_max;
  uint32_t quant_max_uint32;
};

// Compute the params required for quantization / dequantization.
// This is the first part of the `FloatToQuantized` and
// `QuantizedToFloat` methods, and is useful to compute once when there are
// a large number of values from the same tensor, that need to be quantized or
// dequantized.
//
// `num_bits` must be greater than 1, and less than 32.
QuantizationParams GetQuantizationParams(float min_val,
                                         float max_val,
                                         int num_bits);

// Converts the given quantized value (`x`) to a floating point value, using
// the QuantizationParams obtained from the `GetQuantizationParams`
// method.
//
// This is the second part of the `QuantizedToFloat` method, and is
// useful to quickly compute the dequantized value for a large number of
// quantized values, once the scale and min have been computed.
inline float QuantizedToFloatWithQuantParams(uint32_t x,
                                             const QuantizationParams& params) {
  return (x * params.nudged_scale + params.nudged_min);
}

}  // namespace language_detection

#endif  // COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_