1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
/*
* Perform a narrowing division: 128 / 64 -> 64, and 64 / 32 -> 32.
* The dividend's low and high words are given by \p numhi and \p numlo, respectively.
* The divisor is given by \p den.
* \return the quotient, and the remainder by reference in \p r, if not null.
* If the quotient would require more than 64 bits, or if denom is 0, then return the max value
* for both quotient and remainder.
*
* These functions are released into the public domain, where applicable, or the CC0 license.
*/
uint64_t divllu(uint64_t numhi, uint64_t numlo, uint64_t den, uint64_t *r)
{
// We work in base 2**32.
// A uint32 holds a single digit. A uint64 holds two digits.
// Our numerator is conceptually [num3, num2, num1, num0].
// Our denominator is [den1, den0].
const uint64_t b = (1ull << 32);
// The high and low digits of our computed quotient.
uint32_t q1;
uint32_t q0;
// The normalization shift factor.
int shift;
// The high and low digits of our denominator (after normalizing).
// Also the low 2 digits of our numerator (after normalizing).
uint32_t den1;
uint32_t den0;
uint32_t num1;
uint32_t num0;
// A partial remainder.
uint64_t rem;
// The estimated quotient, and its corresponding remainder (unrelated to true remainder).
uint64_t qhat;
uint64_t rhat;
// Variables used to correct the estimated quotient.
uint64_t c1;
uint64_t c2;
// Check for overflow and divide by 0.
if (numhi >= den) {
if (r != NULL)
*r = ~0ull;
return ~0ull;
}
// Determine the normalization factor. We multiply den by this, so that its leading digit is at
// least half b. In binary this means just shifting left by the number of leading zeros, so that
// there's a 1 in the MSB.
// We also shift numer by the same amount. This cannot overflow because numhi < den.
// The expression (-shift & 63) is the same as (64 - shift), except it avoids the UB of shifting
// by 64. The funny bitwise 'and' ensures that numlo does not get shifted into numhi if shift is 0.
// clang 11 has an x86 codegen bug here: see LLVM bug 50118. The sequence below avoids it.
shift = __builtin_clzll(den);
den <<= shift;
numhi <<= shift;
numhi |= (numlo >> (-shift & 63)) & (-(int64_t)shift >> 63);
numlo <<= shift;
// Extract the low digits of the numerator and both digits of the denominator.
num1 = (uint32_t)(numlo >> 32);
num0 = (uint32_t)(numlo & 0xFFFFFFFFu);
den1 = (uint32_t)(den >> 32);
den0 = (uint32_t)(den & 0xFFFFFFFFu);
// We wish to compute q1 = [n3 n2 n1] / [d1 d0].
// Estimate q1 as [n3 n2] / [d1], and then correct it.
// Note while qhat may be 2 digits, q1 is always 1 digit.
qhat = numhi / den1;
rhat = numhi % den1;
c1 = qhat * den0;
c2 = rhat * b + num1;
if (c1 > c2)
qhat -= (c1 - c2 > den) ? 2 : 1;
q1 = (uint32_t)qhat;
// Compute the true (partial) remainder.
rem = numhi * b + num1 - q1 * den;
// We wish to compute q0 = [rem1 rem0 n0] / [d1 d0].
// Estimate q0 as [rem1 rem0] / [d1] and correct it.
qhat = rem / den1;
rhat = rem % den1;
c1 = qhat * den0;
c2 = rhat * b + num0;
if (c1 > c2)
qhat -= (c1 - c2 > den) ? 2 : 1;
q0 = (uint32_t)qhat;
// Return remainder if requested.
if (r != NULL)
*r = (rem * b + num0 - q0 * den) >> shift;
return ((uint64_t)q1 << 32) | q0;
}
uint32_t divlu(uint32_t numhi, uint32_t numlo, uint32_t den, uint32_t *r)
{
// We work in base 2**32.
// A uint16 holds a single digit. A uint32 holds two digits.
// Our numerator is conceptually [num3, num2, num1, num0].
// Our denominator is [den1, den0].
const uint32_t b = (1ull << 16);
// The high and low digits of our computed quotient.
uint16_t q1;
uint16_t q0;
// The normalization shift factor.
int shift;
// The high and low digits of our denominator (after normalizing).
// Also the low 2 digits of our numerator (after normalizing).
uint16_t den1;
uint16_t den0;
uint16_t num1;
uint16_t num0;
// A partial remainder.
uint32_t rem;
// The estimated quotient, and its corresponding remainder (unrelated to true remainder).
uint32_t qhat;
uint32_t rhat;
// Variables used to correct the estimated quotient.
uint32_t c1;
uint32_t c2;
// Check for overflow and divide by 0.
if (numhi >= den) {
if (r != NULL)
*r = ~0u;
return ~0u;
}
// Determine the normalization factor. We multiply den by this, so that its leading digit is at
// least half b. In binary this means just shifting left by the number of leading zeros, so that
// there's a 1 in the MSB.
// We also shift numer by the same amount. This cannot overflow because numhi < den.
// The expression (-shift & 31) is the same as (32 - shift), except it avoids the UB of shifting
// by 32. The funny bitwise 'and' ensures that numlo does not get shifted into numhi if shift is 0.
// clang 11 has an x86 codegen bug here: see LLVM bug 50118. The sequence below avoids it.
shift = __builtin_clz(den);
den <<= shift;
numhi <<= shift;
numhi |= (numlo >> (-shift & 31)) & (-(int32_t)shift >> 31);
numlo <<= shift;
// Extract the low digits of the numerator and both digits of the denominator.
num1 = (uint16_t)(numlo >> 16);
num0 = (uint16_t)(numlo & 0xFFFFu);
den1 = (uint16_t)(den >> 16);
den0 = (uint16_t)(den & 0xFFFFu);
// We wish to compute q1 = [n3 n2 n1] / [d1 d0].
// Estimate q1 as [n3 n2] / [d1], and then correct it.
// Note while qhat may be 2 digits, q1 is always 1 digit.
qhat = numhi / den1;
rhat = numhi % den1;
c1 = qhat * den0;
c2 = rhat * b + num1;
if (c1 > c2)
qhat -= (c1 - c2 > den) ? 2 : 1;
q1 = (uint16_t)qhat;
// Compute the true (partial) remainder.
rem = numhi * b + num1 - q1 * den;
// We wish to compute q0 = [rem1 rem0 n0] / [d1 d0].
// Estimate q0 as [rem1 rem0] / [d1] and correct it.
qhat = rem / den1;
rhat = rem % den1;
c1 = qhat * den0;
c2 = rhat * b + num0;
if (c1 > c2)
qhat -= (c1 - c2 > den) ? 2 : 1;
q0 = (uint16_t)qhat;
// Return remainder if requested.
if (r != NULL)
*r = (rem * b + num0 - q0 * den) >> shift;
return ((uint32_t)q1 << 16) | q0;
}
|