1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements __udivmodti4 for the compiler_rt library.
//
//===----------------------------------------------------------------------===//
#include "int_lib.h"
#ifdef CRT_HAS_128BIT
// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
// Remainder stored in r.
// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
// fallback. For a correctness proof see the reference for this algorithm
// in Knuth, Volume 2, section 4.3.1, Algorithm D.
UNUSED
static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
du_int *r) {
const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
du_int un1, un0; // Norm. dividend LSD's
du_int vn1, vn0; // Norm. divisor digits
du_int q1, q0; // Quotient digits
du_int un64, un21, un10; // Dividend digit pairs
du_int rhat; // A remainder
si_int s; // Shift amount for normalization
s = __builtin_clzll(v);
if (s > 0) {
// Normalize the divisor.
v = v << s;
un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
un10 = u0 << s; // Shift dividend left
} else {
// Avoid undefined behavior of (u0 >> 64).
un64 = u1;
un10 = u0;
}
// Break divisor up into two 32-bit digits.
vn1 = v >> (n_udword_bits / 2);
vn0 = v & 0xFFFFFFFF;
// Break right half of dividend into two digits.
un1 = un10 >> (n_udword_bits / 2);
un0 = un10 & 0xFFFFFFFF;
// Compute the first quotient digit, q1.
q1 = un64 / vn1;
rhat = un64 - q1 * vn1;
// q1 has at most error 2. No more than 2 iterations.
while (q1 >= b || q1 * vn0 > b * rhat + un1) {
q1 = q1 - 1;
rhat = rhat + vn1;
if (rhat >= b)
break;
}
un21 = un64 * b + un1 - q1 * v;
// Compute the second quotient digit.
q0 = un21 / vn1;
rhat = un21 - q0 * vn1;
// q0 has at most error 2. No more than 2 iterations.
while (q0 >= b || q0 * vn0 > b * rhat + un0) {
q0 = q0 - 1;
rhat = rhat + vn1;
if (rhat >= b)
break;
}
*r = (un21 * b + un0 - q0 * v) >> s;
return q1 * b + q0;
}
static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
du_int *r) {
#if defined(__x86_64__)
du_int result;
__asm__("divq %[v]"
: "=a"(result), "=d"(*r)
: [ v ] "r"(v), "a"(u0), "d"(u1));
return result;
#else
return udiv128by64to64default(u1, u0, v, r);
#endif
}
// Effects: if rem != 0, *rem = a % b
// Returns: a / b
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
utwords dividend;
dividend.all = a;
utwords divisor;
divisor.all = b;
utwords quotient;
utwords remainder;
if (divisor.all > dividend.all) {
if (rem)
*rem = dividend.all;
return 0;
}
// When the divisor fits in 64 bits, we can use an optimized path.
if (divisor.s.high == 0) {
remainder.s.high = 0;
if (dividend.s.high < divisor.s.low) {
// The result fits in 64 bits.
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
divisor.s.low, &remainder.s.low);
quotient.s.high = 0;
} else {
// First, divide with the high part to get the remainder in dividend.s.high.
// After that dividend.s.high < divisor.s.low.
quotient.s.high = dividend.s.high / divisor.s.low;
dividend.s.high = dividend.s.high % divisor.s.low;
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
divisor.s.low, &remainder.s.low);
}
if (rem)
*rem = remainder.all;
return quotient.all;
}
// 0 <= shift <= 63.
si_int shift =
__builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
divisor.all <<= shift;
quotient.s.high = 0;
quotient.s.low = 0;
for (; shift >= 0; --shift) {
quotient.s.low <<= 1;
// Branch free version of.
// if (dividend.all >= divisor.all)
// {
// dividend.all -= divisor.all;
// carry = 1;
// }
const ti_int s =
(ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
quotient.s.low |= s & 1;
dividend.all -= divisor.all & s;
divisor.all >>= 1;
}
if (rem)
*rem = dividend.all;
return quotient.all;
}
#endif // CRT_HAS_128BIT
|