1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
|
% Kolmogorov–Smirnov test
% Asymptotically correct. Stephens 1974
private define ks_test_prob (n, d)
{
variable sn = sqrt(n);
variable factor = sn + 0.12 + 0.11/sn;
return 1-smirnov_cdf (sn * d);
}
define ks_test ()
{
variable d_ref = NULL;
if (_NARGS == 2)
d_ref = ();
else if (_NARGS != 1)
usage ("p = ks_test (CDF [,&D]); %% 1-sample KS test\n",
+ " Here CDF are the expected CDFs at the corresponding random points.");
variable cdf = ();
cdf = __tmp(cdf)[array_sort(cdf)];
variable n = length (cdf);
variable nn = 1.0*n;
variable dplus = max ([1:n]/nn - cdf);
variable dminus = max (cdf-[0:n-1]/nn);
variable d = max ([dplus, dminus]);
if (d_ref != NULL)
@d_ref = d;
return ks_test_prob (n, d);
}
% We want ks_test2_prob to return P(D_mn >= d), where d is the observed value.
% It is known that d can only take on values c/mn where c, m, and n are integers.
% So set d=c/mn.
% kim_jennrich_cdf returns P(D_mn <= c/mn)
% But we want P(D_mn >= c/mn) = 1-P(D_mn < c/mn)
% P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) <= P(D_mn <= c/mn)
% P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) <= P(D_mn < c/mn) + P(D_mn==c/mn)
% P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) + P(D_mn==c/mn)
%
% Since D_mn can only take on values c/mn, it follows that
% P(D_mn < c/mn) = P(D_mn <= (c-1)/mn)
%
private define ks_test2_prob ()
{
%if (_NARGS != 3)
% usage ("p = %s(m, n, d); %% P(D_mn >= d)", _function_name ());
variable d, m, n; (m, n, d) = ();
% See the above note for why 1 is subtracted for the first argument of
% kim_jennrich.
variable fm = double (m);
if (fm * n <= 10000.0)
return 1.0 - kim_jennrich_cdf (m, n, int (d*m*n + 0.5) - 1);
% Use asymptotic forms.
return ks_test_prob ((fm*n)/(fm+n), d);
}
define ks_test2 ()
{
variable d_ref = NULL;
if (_NARGS == 3)
d_ref = ();
else if (_NARGS != 2)
usage ("p = %s(X1, X2 [,&D]); %% Two-sample KS test", _function_name ());
variable xm, xn; (xm, xn) = ();
variable x = [xn, xm];
variable n = length (xn);
variable m = length (xm);
variable mn = m + n;
variable c = Int_Type[mn];
c[[0:n-1]] = 1;
variable i = array_sort (x);
x = x[i];
c = c[i]; c = cumsum (__tmp(c));
variable dmn = (c/n - [1:mn]/(mn*1.0));
variable factor = mn/(m*1.0);
variable dplus = factor * max(dmn);
variable dminus = factor * min(dmn);
variable d = max([dplus, -dminus]);
if (d_ref != NULL)
@d_ref = d;
return ks_test2_prob (m, n, d);
}
|