1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
"""
Performs a two-sided Kolmogorov-Smirnov test that the provided
sample comes from the given probability distribution function.
"""
from __future__ import print_function
from past.builtins import xrange
import numpy
import pyferret
import pyferret.stats
import scipy.stats
def ferret_init(id):
"""
Initialization for the stats_kstest1 PyEF
"""
axes_values = [ pyferret.AXIS_DOES_NOT_EXIST ] * pyferret.MAX_FERRET_NDIM
axes_values[0] = pyferret.AXIS_CUSTOM
false_influences = [ False ] * pyferret.MAX_FERRET_NDIM
retdict = { "numargs": 3,
"descript": "Returns two-sided Kolmogorov-Smirnov test stat. and prob. " \
"that sample comes from a pop. with given prob. distrib.",
"axes": axes_values,
"argnames": ( "SAMPLE", "PDNAME", "PDPARAMS", ),
"argdescripts": ( "Sample data array",
"Name of a continuous probability distribution",
"Parameters for this continuous probability distribution"),
"argtypes": ( pyferret.FLOAT_ARRAY, pyferret.STRING_ONEVAL, pyferret.FLOAT_ARRAY, ),
"influences": ( false_influences, false_influences, false_influences, ),
}
return retdict
def ferret_custom_axes(id):
"""
Define custom axis of the stats_kstest1 Ferret PyEF
"""
axis_defs = [ None ] * pyferret.MAX_FERRET_NDIM
axis_defs[0] = ( 1, 2, 1, "KS,P", False )
return axis_defs
def ferret_compute(id, result, resbdf, inputs, inpbdfs):
"""
Performs a two-sided Kolmogorov-Smirnov test that the provided sample
comes from a population with the given probability distribution function.
The sample is given in inputs[0], the probability distribution function
name is given in inputs[1] (a string), and the "standard" parameters for
this probability distribution function are given in inputs[2]. The test
statistic value and two-tailed probability are returned in result.
Undefined data given in inputs[0] are removed before performing the test.
"""
# get the scipy.stats distribution name from the given distribution name
if inputs[1] == None:
raise ValueError("The name of a probability distribution function not given")
distscipyname = pyferret.stats.getdistname(inputs[1])
if distscipyname == None:
raise ValueError("Unknown or unsupported probability distribution function %s" % inputs[1])
# get the scipy.stats distribution parameters from the given "standard" parameters
if inputs[2] == None:
raise ValueError("Paramaters for the probability distribution function not given")
distscipyparams = pyferret.stats.getdistparams(distscipyname, inputs[2].reshape(-1))
if distscipyparams == None:
raise ValueError("Unknown or unsupported (for params) probability distribution function %s" % inputs[1])
# get the valid sample values
badmask = ( numpy.fabs(inputs[0] - inpbdfs[0]) < 1.0E-5 )
badmask = numpy.logical_or(badmask, numpy.isnan(inputs[0]))
goodmask = numpy.logical_not(badmask)
values = inputs[0][goodmask]
# perform the test and assign the results
fitparams = scipy.stats.kstest(values, distscipyname, distscipyparams)
result[:] = resbdf
# Kolmogorov-Smirnov test statistic
result[0] = fitparams[0]
# probability
result[1] = fitparams[1]
#
# The rest of this is just for testing this module at the command line
#
if __name__ == "__main__":
# make sure ferret_init and ferret_custom_axes do not have problems
info = ferret_init(0)
info = ferret_custom_axes(0)
# Set the seed to reproduce a problematic distribution
# import numpy.random
# numpy.random.seed(3333333)
# Get a random sample from the compared distribution and from another distribution
ydim = 200
zdim = 150
mu = 5.0
sigma = 0.5
rvsc = scipy.stats.norm(mu, sigma).rvs(ydim * zdim)
rvsu = scipy.stats.uniform(loc=(mu + 3.0 * sigma), scale=(3.0 * sigma)).rvs(ydim * zdim)
# setup for the call to ferret_compute
distname = "norm"
distparams = numpy.array([mu, sigma], dtype=numpy.float64)
inpbdfs = numpy.array([-9999.0, -1.0, -2.0], dtype=numpy.float64)
resbdf = numpy.array([-8888.0], dtype=numpy.float64)
sampc = numpy.empty((1, ydim, zdim, 1, 1, 1), dtype=numpy.float64, order='F')
sampu = numpy.empty((1, ydim, zdim, 1, 1, 1), dtype=numpy.float64, order='F')
index = 0
for j in xrange(ydim):
for k in xrange(zdim):
if (index % 71) == 3:
sampc[0, j, k, 0, 0, 0] = inpbdfs[0]
sampu[0, j, k, 0, 0, 0] = inpbdfs[0]
else:
sampc[0, j, k, 0, 0, 0] = rvsc[index]
sampu[0, j, k, 0, 0, 0] = rvsu[index]
index += 1
resultc = -7777.0 * numpy.ones((2, 1, 1, 1, 1, 1), dtype=numpy.float64, order='F')
resultu = -7777.0 * numpy.ones((2, 1, 1, 1, 1, 1), dtype=numpy.float64, order='F')
# call ferret_compute with data from the distribution and check the results
ferret_compute(0, resultc, resbdf, (sampc, distname, distparams), inpbdfs)
resultc = resultc.reshape(-1)
print ("from same dist result: %s" % str(resultc))
if (resultc[0] < 0.00) or (resultc[0] > 0.01) or \
(resultc[1] < 0.10) or (resultc[1] > 1.00):
raise ValueError("Unexpected result")
# call ferret_compute with data from a different distribution and check the results
ferret_compute(0, resultu, resbdf, (sampu, distname, distparams), inpbdfs)
resultu = resultu.reshape(-1)
print ("from diff dist result: %s" % str(resultu))
if (resultu[0] < 0.99) or (resultu[0] > 1.00) or \
(resultu[1] < 0.00) or (resultu[1] > 0.01):
raise ValueError("Unexpected result")
# All successful
print ("Success")
|