File: fixed_divisor.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (132 lines) | stat: -rw-r--r-- 3,532 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#ifndef CAFFE2_UTILS_FIXED_DIVISOR_H_
#define CAFFE2_UTILS_FIXED_DIVISOR_H_

#include <cstdint>
#include <cstdio>
#include <cstdlib>

// See Note [hip-clang differences to hcc]

#if defined(__CUDA_ARCH__) || defined(__HIP_ARCH__) || defined(__HIP__) || \
    (defined(__clang__) && defined(__CUDA__))
#define FIXED_DIVISOR_DECL inline __host__ __device__
#else
#define FIXED_DIVISOR_DECL inline
#endif

namespace caffe2 {

// Utility class for quickly calculating quotients and remainders for
// a known integer divisor
template <typename T>
class FixedDivisor {};

// Works for any positive divisor, 1 to INT_MAX. One 64-bit
// multiplication and one 64-bit shift is used to calculate the
// result.
template <>
class FixedDivisor<std::int32_t> {
 public:
  FixedDivisor() = default;

  explicit FixedDivisor(const std::int32_t d) : d_(d) {
#if !defined(USE_ROCM)
    CalcSignedMagic();
#endif // USE_ROCM
  }

  FIXED_DIVISOR_DECL std::int32_t d() const {
    return d_;
  }

#if !defined(USE_ROCM)
  FIXED_DIVISOR_DECL std::uint64_t magic() const {
    return magic_;
  }

  FIXED_DIVISOR_DECL int shift() const {
    return shift_;
  }
#endif // USE_ROCM

  /// Calculates `q = n / d`.
  FIXED_DIVISOR_DECL std::int32_t Div(const std::int32_t n) const {
#if defined(USE_ROCM)
    return n / d_;
#else // USE_ROCM
    // In lieu of a mulhi instruction being available, perform the
    // work in uint64
    return (int32_t)((magic_ * (uint64_t)n) >> shift_);
#endif // USE_ROCM
  }

  /// Calculates `r = n % d`.
  FIXED_DIVISOR_DECL std::int32_t Mod(const std::int32_t n) const {
    return n - d_ * Div(n);
  }

  /// Calculates `q = n / d` and `r = n % d` together.
  FIXED_DIVISOR_DECL void
  DivMod(const std::int32_t n, std::int32_t* q, int32_t* r) const {
    *q = Div(n);
    *r = n - d_ * *q;
  }

 private:
#if !defined(USE_ROCM)
  // Calculates magic multiplicative value and shift amount for calculating `q =
  // n / d` for signed 32-bit integers.
  // Implementation taken from Hacker's Delight section 10.
  void CalcSignedMagic() {
    if (d_ == 1) {
      magic_ = UINT64_C(0x1) << 32;
      shift_ = 32;
      return;
    }

    const std::uint32_t two31 = UINT32_C(0x80000000);
    const std::uint32_t ad = std::abs(d_);
    const std::uint32_t t = two31 + ((uint32_t)d_ >> 31);
    const std::uint32_t anc = t - 1 - t % ad; // Absolute value of nc.
    std::uint32_t p = 31; // Init. p.
    std::uint32_t q1 = two31 / anc; // Init. q1 = 2**p/|nc|.
    std::uint32_t r1 = two31 - q1 * anc; // Init. r1 = rem(2**p, |nc|).
    std::uint32_t q2 = two31 / ad; // Init. q2 = 2**p/|d|.
    std::uint32_t r2 = two31 - q2 * ad; // Init. r2 = rem(2**p, |d|).
    std::uint32_t delta = 0;
    do {
      ++p;
      q1 <<= 1; // Update q1 = 2**p/|nc|.
      r1 <<= 1; // Update r1 = rem(2**p, |nc|).
      if (r1 >= anc) { // (Must be an unsigned
        ++q1; // comparison here).
        r1 -= anc;
      }
      q2 <<= 1; // Update q2 = 2**p/|d|.
      r2 <<= 1; // Update r2 = rem(2**p, |d|).
      if (r2 >= ad) { // (Must be an unsigned
        ++q2; // comparison here).
        r2 -= ad;
      }
      delta = ad - r2;
    } while (q1 < delta || (q1 == delta && r1 == 0));
    std::int32_t magic = q2 + 1;
    if (d_ < 0) {
      magic = -magic;
    }
    shift_ = p;
    magic_ = (std::uint64_t)(std::uint32_t)magic;
  }
#endif // USE_ROCM

  std::int32_t d_ = 1;

#if !defined(USE_ROCM)
  std::uint64_t magic_;
  int shift_;
#endif // USE_ROCM
};

} // namespace caffe2

#endif // CAFFE2_UTILS_FIXED_DIVISOR_H_