File: fbar.tex

package info (click to toggle)
testu01 1.2.3%2Bds1-2
  • links: PTS, VCS
  • area: non-free
  • in suites: bookworm
  • size: 17,732 kB
  • sloc: ansic: 52,357; makefile: 239; sh: 53
file content (371 lines) | stat: -rw-r--r-- 12,040 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
\defmodule {fbar}

This module is similar to {\tt fdist}, except that it provides procedures
to compute or approximate the complementary distribution function of $X$,
which we define as $\bar F(x) = P[X\ge x]$, instead of $F(x)=P[X\le x]$.
Note that with our definition of $\bar F$, one has
$\bar F(x) = 1 - F(x)$ for continuous distributions and
$\bar F(x) = 1 - F(x-1)$ for discrete distributions over the integers.
This is non-standard but we find it convenient.

For more details about the specific distributions, 
see the module {\tt fdist}.
When $F(x)$ is very close to 1, these procedures generally provide much 
more precise values of $\bar F(x)$ than using $1-F(x)$ where $F(x)$ is
computed by a procedure from {\tt fdist}.


\bigskip
\hrule
\code\hide
/* fbar.h for ANSI C */
#ifndef FBAR_H
#define FBAR_H
\endhide
#include <testu01/gdef.h>
#include <testu01/fmass.h>
\endcode

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\guisec{Continuous distributions}

\code

double fbar_Unif (double x);
\endcode
  \tab
  Returns $1-x$ for $x \in [0, 1]$, 1 for $x<0$, and 0 for $x>1$.
  This is the complementary uniform distribution function over $[0, 1]$.
 \endtab
\code


double fbar_Expon (double x);
\endcode
  \tab
  Returns the complementary exponential distribution:
  $\bar F(x) = e^{- x}$ for $x>0$, and $=1$ for $x\le 0$.
%  This is the complementary exponential distribution with mean $\mu$.\\
%    Restriction: {\tt mu} = $\mu > 0$.
 \endtab
\code


double fbar_Weibull (double alpha, double x);
\endcode
  \tab
  Returns the complementary standard Weibull distribution function 
  with shape parameter $\alpha$ \cite{tJOH95a}, defined by 
  $\bar F(x) = e^{- x^\alpha}$ for $x>0$ and 1 for $x\le 0$.
  Restriction: $\alpha > 0$.
 \endtab
\code


double fbar_Logistic (double x);
\endcode
  \tab
  Returns $\bar F(x) = 1/(1 + e^{x})$, the complementary standard 
  logistic distribution function evaluated at $x$ \cite{tJOH95b}.
 \endtab
\code


double fbar_Pareto (double c, double x);
\endcode
  \tab
  Returns $\bar F(x) = 1/x^c$ for $x\ge 1$ and 1 for $x\le 1$, 
  which is the complementary standard Pareto
  distribution function \cite{tJOH95a}.
  Restriction: $c > 0$.
 \endtab
\code


double fbar_Normal1 (double x);
\endcode
  \tab  
  Returns an approximation of $1 - \Phi(x)$, 
  where $\Phi$ is the standard normal distribution function,
  with mean 0 and variance 1. 
  Uses a Chebyshev series giving 16 decimal digits of
  precision \cite{tSCH78a}.
 \endtab
\code


double fbar_Normal2 (double x);
\endcode
  \tab  
  Returns an approximation of $1 - \Phi(x)$,  where $\Phi$ is the standard
  normal distribution function, with mean 0 and variance 1. 
   Uses Marsaglia's et al \cite{rMAR94b} fast method
  with tables lookup. Returns 15 decimal digits of precision.
  This function is approximately 1.3 times faster than {\tt fbar\_Normal1}.
 \endtab
\code


#ifdef HAVE_ERF
   double fbar_Normal3 (double x);
#endif
\endcode
  \tab
  Returns an approximation of $1 - \Phi(x)$, 
  where $\Phi$ is the standard normal distribution function,
  with mean 0 and variance 1. 
  Uses the {\tt erfc} function from the standard Unix C library. The macro
  {\tt HAVE\_ERF} from {\tt mylib/gdef} must be defined. This function
  is twice as fast as {\tt fbar\_Normal2}.
 \endtab
\code


double fbar_BiNormal1 (double x, double y, double rho, int ndig);
\endcode
  \tab  
  Returns the value $u$ of the upper standard bivariate normal distribution,
  given by
\begin{eqnarray}
     u &=&  \frac{1}{2\pi\sqrt{1 - \rho^2}} \int^{\infty}_x
              \int^{\infty}_y e^{-T} dy\, dx  \label{eq.binormal3} \\[5pt]
     T &=& \frac{x^2 -2\rho x y + y^2}{2(1-\rho^2)}, \nonumber
\end{eqnarray}
  where $\rho = ${ \tt rho} is the correlation between $x$ and $y$, and
 \texttt{ndig} is the number of decimal digits of accuracy.
  It calls the function {\tt fdist\_BiNormal1}. The absolute error
  is expected to be smaller than $10^{-d}$, where $d={}$\texttt{ndig}.
  Restriction: \texttt{ndig}${} \le 15$.
 \endtab
\code


double fbar_BiNormal2 (double x, double y, double rho);
\endcode
  \tab  
  Returns the value of the upper standard bivariate normal distribution as 
  defined in (\ref{eq.binormal3}) above.
  It calls the function {\tt fdist\_BiNormal2} (see the description in
   module {\tt fdist}). The function gives
   an absolute error less than $5 \cdot 10^{-16}$. 
 \endtab
\code


double fbar_ChiSquare1 (long N, double x);
\endcode
  \tab  
    Returns $\bar F(x)$, the complementary
   chi-square distribution function with
   $N$ degrees of freedom.
  Uses the approximation given in \cite[p.116]{tKEN80a} for $N\le 1000$,
  and the normal approximation for $N > 1000$. Gives no more than 4
  decimals of precision for $N > 1000$.
 \endtab
\code


double fbar_ChiSquare2 (long N, int d, double x);
\endcode
  \tab  Returns $\bar F(x)$, the complementary chi-square distribution
  function with $N$ degrees of freedom, by calling
  {\tt fbar\_Gamma (N/2, d, x/2)}.
  The function will do its best to return $d$ decimals
  digits of precision (but there is no guarantee).
  Restrictions:  $N>0$ and $0 < d \le 15$.
 \endtab
\code


double fbar_Gamma (double a, int d, double x);
\endcode
  \tab
  Returns an approximation \cite{tBAT70a} of the complementary {\em gamma\/}
  distribution function with parameter $a$.
  The function tries to return $d$ decimals digits of precision.
%, but there is no guarantee.  
  For $a$ not too large (e.g., $a \le 1000$),
  $d$ gives a good idea of the precision attained.
%% For d = 16, gives at least 13 decimals of precision for $a \le 1000$,
%% and at least 10 decimals  for $1000 < a \le 100000$.
   Restrictions:  $a>0$ and  $0 < d \le 15$.
  \endtab
\code


double fbar_KS1 (long n, double x);
\endcode
\tab Returns the complementary Kolmogorov-Smirnov distribution 
$\bar F(x) = P[D_n \ge x]$ in a form that is more precise in the upper tail,
using the program described in \cite{LECz09}. 
It returns at least 10 decimal digits of precision everywhere for all
 $n \le 400$,
 at least 6 decimal digits of precision for $400 < n \le 200000$,
and a few correct digits (1 to 5) for $n > 200000$.
 Restrictions:  $n\ge 1$ and $0 \le x \le 1$.
\endtab
\code


double fbar_KSPlus (long n, double x);
\endcode 
\tab Returns the complementary Kolmogorov-Smirnov+ distribution 
$\bar F(x) = P[D_n^+ \ge x]$ in a form that is more precise in the upper
tail. It should return at least 8 decimal digits of precision everywhere. 
% It becomes more precise as $x$ or $n$ increase.
 Restrictions:  $n>0$ and $0 \le x \le 1$.
\endtab
\code


double fbar_LogNormal (double mu, double sigma, double x);

double fbar_JohnsonSB (double alpha, double beta, double a, double b,
                       double x);

double fbar_JohnsonSU (double alpha, double beta, double x);

double fbar_CramerMises (long n, double x);

double fbar_WatsonU (long n, double x);

double fbar_WatsonG (long n, double x);

double fbar_AndersonDarling (long n, double x);
\endcode
  \tab  Return the complementary distribution function $P[X\ge x]$.
   See the description of the respective functions in \texttt{fdist}.
 \endtab


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\guisec{Discrete distributions}

\code

double fbar_Geometric (double p, long s);
\endcode
  \tab Returns the complementary distribution function of 
   a geometric random variable $X$ with parameter $p$, 
   $\bar F(s) = P[X\ge s] = (1-p)^s$ 
   for $s\ge 0$.
  Restriction: $0 \le p \le 1$.
 \endtab
\code


double fbar_Poisson1 (double lambda, long s);
\endcode
  \tab  Returns the complementary distribution function $P[X\ge s]$
   for a Poisson random variable $X$ with parameter $\lambda$.
   Computes and adds the non-negligible terms in the tail.
   Restriction: $\lambda > 0$.
 \endtab
\code


double fbar_Poisson2 (fmass_INFO W, long s);
\endcode
 \tab  Returns the complementary Poisson distribution function,
  using the structure {\tt W} which must have been created previously
  by calling {\tt fmass\_CreatePoisson} with the desired $\lambda$.
%  Restriction: only integral values of $s$ are meaningful. 
 \endtab
\code


double fbar_Binomial2 (fmass_INFO W, long s);
\endcode
  \tab  Returns the complementary distribution function $P[X\ge s]$
  for a binomial random variable $X$,
  using the structure {\tt W} which must have been created previously
  by calling {\tt fmass\_CreateBinomial} with the desired values of
  $n$ and $p$.
 \endtab
\code


double fbar_NegaBin2 (fmass_INFO W, long s);
\endcode
  \tab  Returns the complementary distribution function $P[X\ge s]$
  for a negative binomial random variable $X$,
  using the structure {\tt W}
  which must have been created previously
  by calling {\tt fmass\_CreateNegaBin} with the desired values of
  $n$ and $p$.
 \endtab
\code


double fbar_Scan (long N, double d, long m);
\endcode
 \tab Return $P[S_N(d) \ge m]$, where $S_N(d)$ is the scan statistic
  (see \cite{tGLA89a} and {\tt gofs\_Scan}), defined as 
  \eq
    S_N(d) = \sup_{0\le y\le 1-d} \eta[y,\,y+d],     \eqlabel{eq:scan}
  \endeq
  where $d$ is a constant in $(0, 1)$,
  $\eta[y,\,y+d]$ is the number of observations falling inside 
  the interval $[y, y+d]$, from a sample of $N$ i.i.d.\ $U(0,1)$
  random variables.
  One has (see \cite {tAND95b}),
  \begin {eqnarray}
   P[S_N(d) \ge m]
    &\approx& \left({m\over d}-N-1\right) b(m)
              + 2 \sum_{i=m}^{N} b(i)            \eqlabel{DistScan1} \\[6pt]
    &\approx& 2(1-\Phi(\theta\kappa)) + \theta\kappa
              {\exp[-\theta^2\kappa^2 /2] \over d \sqrt{2\pi}}
                                                 \eqlabel {DistScan2}
  \end {eqnarray}
   where $\Phi$ is the standard normal distribution function,
  \begin {eqnarray*}
   b(i)    &=& {N \choose i} d^i (1-d)^{N-i}, \\[4pt]
   \theta  &=& \sqrt{\frac d{1-d}}, \\[4pt]
   \kappa  &=& \frac m{d \sqrt{N}} - \sqrt{N}.
  \end {eqnarray*}
  For $d \le 1/2$, (\ref{DistScan1}) is exact for $m > N/2$,
  but only an approximation otherwise.
  The approximation (\ref{DistScan2}) is good when
  $N d^2$ is large or when $d > 0.3$ and $N>50$.
  In other cases, this implementation sometimes use the approximation
  proposed by Glaz \cite{tGLA89a}.
  For more information, see \cite {tAND95b,tGLA89a,tWAL87a}.
  The approximation returned by this function is generally good when
  it is close to 0, but is not very reliable when it exceeds, say, 0.4.
%%
\ifdetailed  %%%%%%
  If $m \le (N + 1)d$, the function returns 1. 
  Else, if $Nd \le 10$, it returns the approximation given by 
  Glaz \cite{tGLA89a}.  
  If $Nd > 10$, it computes (\ref{DistScan2}) or (\ref{DistScan1}) 
  and returns the result if it does not exceed 0.4, otherwise it computes
  the approximation from \cite{tGLA89a}, returns it if it is less than 1.0, 
% inside the interval $(0.4, 1.0)$, 
  and returns 1.0 otherwise.
 \hpierre{Even if it 0.40001 in the first approximation, and 0.3999 
   in the second one?}
 \hrichard{C'est ce qui est programm\'e. Probablement que dans ce cas,
   l'approximation est tellement mauvaise que nous avons d\'ecid\'e de
   retourner 1. On pourrait retourner 0 au lieu??}
 \hpierre {\c Ca n'a pas de bon sens.  Je ne vois pas pourquoi elle serait
   ``tellement mauvaise'' dans le cas cit\'e avec 0.3999 et subitement 
   bonne si la seconde approximation retourne 0.4.
   Il me semble que dans un tel cas on devrait retourner quelque chose
   aux alentours de 0.4.
   Et ce qui est g\^enant c'est qu'en retournant 0 ou 1 comme p-valeur,
   on croira que l'hypoth\`ese nulle est rejet\'ee!  }
  The relative error can 
  reach 10\%\ when $Nd \le 10$ or when the returned value
  is less than 0.4.  For $m > Nd$ and $Nd > 10$, a returned value 
  that exceeds $0.4$ should be regarded as unreliable.
  For $m = 3$, the returned values are totally unreliable.
  (There may be an error in the original formulae in \cite{tGLA89a}).
\fi  %%%%  detailed
  Restrictions: $N \ge 2$  and $d \le 1/2$.\\
 \endtab

\code
\hide
#endif
\endhide
\endcode