1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
|
/* mpfr_log_ui -- compute natural logarithm of an unsigned long
Copyright 2014-2019 Free Software Foundation, Inc.
Contributed by the AriC and Caramba projects, INRIA.
This file is part of the GNU MPFR Library.
The GNU MPFR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The GNU MPFR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"
/* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
e.g. about 4 times as slow for n around ULONG_MAX/3 on an
x86_64 Linux machine, for 10^6 bits of precision. The reason is that
for say n=6148914691236517205 and prec=10^6, the value of T computed
has more than 50M bits, which is much more than needed. Indeed the
binary splitting algorithm for series with a finite radius of convergence
gives rationals of size n*log(n) for a target precision n. One might
truncate the rationals inside the algorithm, but then the error analysis
should be redone. */
/* Cf http://www.ginac.de/CLN/binsplit.pdf: the Taylor series of log(1+x)
up to order N for x=p/2^k is T/(B*Q).
P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
B[0] <- n1 * (n1+1) * ... * (n2-1)
T[0] <- B[0]*Q[0] * S(n1,n2)
where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
*/
static void
S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
unsigned long n2, long p, unsigned long k, int need_P)
{
MPFR_ASSERTD (n1 < n2);
MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
if (n2 == n1 + 1)
{
mpz_set_si (P[0], (n1 == 1) ? p : -p);
*q = k;
mpz_set_ui (B[0], n1);
/* T = B*Q*S where S = P/(B*Q) thus T = P */
mpz_set (T[0], P[0]);
/* since p is odd (or zero), there is no common factor 2 between
P and Q, or T and B */
}
else
{
unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
/* m = floor((n1+n2)/2) */
MPFR_ASSERTD (n1 < m && m < n2);
S (P, q, B, T, n1, m, p, k, 1);
S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
/* T0 <- T0*B1*Q1 + P0*B0*T1 */
mpz_mul (T[1], T[1], P[0]);
mpz_mul (T[1], T[1], B[0]);
mpz_mul (T[0], T[0], B[1]);
/* Q[1] = 2^q1 */
mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
mpz_add (T[0], T[0], T[1]);
if (need_P)
mpz_mul (P[0], P[0], P[1]);
*q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
mpz_mul (B[0], B[0], B[1]);
/* there should be no common factors 2 between P, Q and T,
since P is odd (or zero) */
}
}
int
mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
{
unsigned long k;
mpfr_prec_t w; /* working precision */
mpz_t three_n, *P, *B, *T;
mpfr_t t, q;
int inexact;
unsigned long N, lgN, i, kk;
long p;
MPFR_GROUP_DECL(group);
MPFR_TMP_DECL(marker);
MPFR_ZIV_DECL(loop);
MPFR_SAVE_EXPO_DECL (expo);
if (n <= 2)
{
if (n == 0)
{
MPFR_SET_INF (x);
MPFR_SET_NEG (x);
MPFR_SET_DIVBY0 ();
MPFR_RET (0); /* log(0) is an exact -infinity */
}
else if (n == 1)
{
MPFR_SET_ZERO (x);
MPFR_SET_POS (x);
MPFR_RET (0); /* only "normal" case where the result is exact */
}
/* now n=2 */
return mpfr_const_log2 (x, rnd_mode);
}
/* here n >= 3 */
/* Argument reduction: compute k such that 2/3 <= n/2^k < 4/3,
i.e., 2^(k+1) <= 3n < 2^(k+2).
FIXME: we could do better by considering n/(2^k*3^i*5^j),
which reduces the maximal distance to 1 from 1/3 to 1/8,
thus needing about 1.89 less terms in the Taylor expansion of
the reduced argument. Then log(2^k*3^i*5^j) can be computed
using a combination of log(16/15), log(25/24) and log(81/80),
see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
mpz_init_set_ui (three_n, n);
mpz_mul_ui (three_n, three_n, 3);
k = mpz_sizeinbase (three_n, 2) - 2;
MPFR_ASSERTD (k >= 2);
mpz_clear (three_n);
/* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
so that p and -p both fit in a long. */
if (k < sizeof (unsigned long) * CHAR_BIT)
n -= 1UL << k;
/* n is now the value of p mod ULONG_MAX+1 */
p = n > LONG_MAX ? - (long) - n : (long) n;
MPFR_TMP_MARK(marker);
w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
MPFR_GROUP_INIT_2(group, w, t, q);
MPFR_SAVE_EXPO_MARK (expo);
kk = k;
if (p != 0)
while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
{
p /= 2;
kk --;
}
MPFR_ZIV_INIT (loop, w);
for (;;)
{
mpfr_t tmp;
unsigned int err;
unsigned long q0;
/* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
mpfr_init2 (tmp, 32);
mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
mpfr_log2 (tmp, tmp, MPFR_RNDU);
mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
MPFR_ASSERTN (w <= ULONG_MAX);
mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
N = mpfr_get_ui (tmp, MPFR_RNDU);
if (N < 2)
N = 2;
lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
mpfr_clear (tmp);
P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
B = P + lgN;
T = B + lgN;
for (i = 0; i < lgN; i++)
{
mpz_init (P[i]);
mpz_init (B[i]);
mpz_init (T[i]);
}
S (P, &q0, B, T, 1, N, p, kk, 0);
/* mpz_mul (Q[0], B[0], Q[0]); */
/* mpz_mul_2exp (B[0], B[0], q0); */
mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
mpfr_mul_2exp (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
mpfr_div (t, t, q, MPFR_RNDN); /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
= log(n/2^k) * (1 + theta_4)^4
for |theta_i| < 2^(-w) */
/* argument reconstruction: add k*log(2) */
mpfr_const_log2 (q, MPFR_RNDN);
mpfr_mul_ui (q, q, k, MPFR_RNDN);
mpfr_add (t, t, q, MPFR_RNDN);
for (i = 0; i < lgN; i++)
{
mpz_clear (P[i]);
mpz_clear (B[i]);
mpz_clear (T[i]);
}
/* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
thus at most k+6 ulps.
Note that there might be some cancellation in the addition: the worst
case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
have an exponent decrease of 1, which accounts for +1 in the error. */
err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
break;
MPFR_ZIV_NEXT (loop, w);
MPFR_GROUP_REPREC_2(group, w, t, q);
}
MPFR_ZIV_FREE (loop);
inexact = mpfr_set (x, t, rnd_mode);
MPFR_GROUP_CLEAR(group);
MPFR_TMP_FREE(marker);
MPFR_SAVE_EXPO_FREE (expo);
return mpfr_check_range (x, inexact, rnd_mode);
}
|