1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
#ifndef CAFFE2_UTILS_FIXED_DIVISOR_H_
#define CAFFE2_UTILS_FIXED_DIVISOR_H_
#include <cstdint>
#include <cstdio>
#include <cstdlib>
// See Note [hip-clang differences to hcc]
#if defined(__CUDA_ARCH__) || defined(__HIP_ARCH__) || defined(__HIP__) || \
(defined(__clang__) && defined(__CUDA__))
#define FIXED_DIVISOR_DECL inline __host__ __device__
#else
#define FIXED_DIVISOR_DECL inline
#endif
namespace caffe2 {
// Utility class for quickly calculating quotients and remainders for
// a known integer divisor
template <typename T>
class FixedDivisor {};
// Works for any positive divisor, 1 to INT_MAX. One 64-bit
// multiplication and one 64-bit shift is used to calculate the
// result.
template <>
class FixedDivisor<std::int32_t> {
public:
FixedDivisor() = default;
explicit FixedDivisor(const std::int32_t d) : d_(d) {
#if !defined(USE_ROCM)
CalcSignedMagic();
#endif // USE_ROCM
}
FIXED_DIVISOR_DECL std::int32_t d() const {
return d_;
}
#if !defined(USE_ROCM)
FIXED_DIVISOR_DECL std::uint64_t magic() const {
return magic_;
}
FIXED_DIVISOR_DECL int shift() const {
return shift_;
}
#endif // USE_ROCM
/// Calculates `q = n / d`.
FIXED_DIVISOR_DECL std::int32_t Div(const std::int32_t n) const {
#if defined(USE_ROCM)
return n / d_;
#else // USE_ROCM
// In lieu of a mulhi instruction being available, perform the
// work in uint64
return (int32_t)((magic_ * (uint64_t)n) >> shift_);
#endif // USE_ROCM
}
/// Calculates `r = n % d`.
FIXED_DIVISOR_DECL std::int32_t Mod(const std::int32_t n) const {
return n - d_ * Div(n);
}
/// Calculates `q = n / d` and `r = n % d` together.
FIXED_DIVISOR_DECL void
DivMod(const std::int32_t n, std::int32_t* q, int32_t* r) const {
*q = Div(n);
*r = n - d_ * *q;
}
private:
#if !defined(USE_ROCM)
// Calculates magic multiplicative value and shift amount for calculating `q =
// n / d` for signed 32-bit integers.
// Implementation taken from Hacker's Delight section 10.
void CalcSignedMagic() {
if (d_ == 1) {
magic_ = UINT64_C(0x1) << 32;
shift_ = 32;
return;
}
const std::uint32_t two31 = UINT32_C(0x80000000);
const std::uint32_t ad = std::abs(d_);
const std::uint32_t t = two31 + ((uint32_t)d_ >> 31);
const std::uint32_t anc = t - 1 - t % ad; // Absolute value of nc.
std::uint32_t p = 31; // Init. p.
std::uint32_t q1 = two31 / anc; // Init. q1 = 2**p/|nc|.
std::uint32_t r1 = two31 - q1 * anc; // Init. r1 = rem(2**p, |nc|).
std::uint32_t q2 = two31 / ad; // Init. q2 = 2**p/|d|.
std::uint32_t r2 = two31 - q2 * ad; // Init. r2 = rem(2**p, |d|).
std::uint32_t delta = 0;
do {
++p;
q1 <<= 1; // Update q1 = 2**p/|nc|.
r1 <<= 1; // Update r1 = rem(2**p, |nc|).
if (r1 >= anc) { // (Must be an unsigned
++q1; // comparison here).
r1 -= anc;
}
q2 <<= 1; // Update q2 = 2**p/|d|.
r2 <<= 1; // Update r2 = rem(2**p, |d|).
if (r2 >= ad) { // (Must be an unsigned
++q2; // comparison here).
r2 -= ad;
}
delta = ad - r2;
} while (q1 < delta || (q1 == delta && r1 == 0));
std::int32_t magic = q2 + 1;
if (d_ < 0) {
magic = -magic;
}
shift_ = p;
magic_ = (std::uint64_t)(std::uint32_t)magic;
}
#endif // USE_ROCM
std::int32_t d_ = 1;
#if !defined(USE_ROCM)
std::uint64_t magic_;
int shift_;
#endif // USE_ROCM
};
} // namespace caffe2
#endif // CAFFE2_UTILS_FIXED_DIVISOR_H_
|