1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
"""
Returns an array containing the order statistic medians for a given
probability distribution along the X axis in the first Y axis value,
and ordered response data of a given sample along the X axis in the
second Y axis value. The order statistic medians are the percent
probability function values of the probability distribution at
regularily-spaced intervals. The ordered response data is the sorted
sample values. If the sample comes from a probability distribution
of the type given, the plot of the ordered response data against the
order statistic medians should give a straight line whose slope is
the offset from, and whose intercept is the scaling of, the given
distribution. Thus, the slope, intercept, and correlation coefficient
(r) of this fitted line are returned and the first three X elements
of the third Y axis value.
"""
from __future__ import print_function
import numpy
import scipy.stats
import pyferret
import pyferret.stats
def ferret_init(id):
"""
Initialization for the stats_probplotvals Ferret PyEF
"""
axes_values = [ pyferret.AXIS_DOES_NOT_EXIST ] * pyferret.MAX_FERRET_NDIM
axes_values[0] = pyferret.AXIS_CUSTOM
axes_values[1] = pyferret.AXIS_CUSTOM
false_influences = [ False ] * pyferret.MAX_FERRET_NDIM
retdict = { "numargs": 3,
"descript": "Returns [j=1] order statistic medians, " \
"[j=2] ordered response data, and " \
"[j=3] slope, intercept, and corr. coeff. of fitted line",
"axes": axes_values,
"argnames": ("SAMPLE", "PDNAME", "PDPARAMS"),
"argdescripts": ("Sample values for the ordered response data",
"Name of a continuous probability distribution for the order statistic medians",
"Parameters for this continuous probability distribution"),
"argtypes": (pyferret.FLOAT_ARRAY, pyferret.STRING_ONEVAL, pyferret.FLOAT_ARRAY),
"influences": (false_influences, false_influences, false_influences),
}
return retdict
def ferret_custom_axes(id):
"""
Custom axis information for stats_probplot_values Ferret PyEF
"""
size = 1
for axis in ( pyferret.X_AXIS, pyferret.Y_AXIS, pyferret.Z_AXIS,
pyferret.T_AXIS, pyferret.E_AXIS, pyferret.F_AXIS ):
axis_info = pyferret.get_axis_info(id, pyferret.ARG1, axis)
# Note: axes normal to the data have size = -1
num = axis_info.get("size", -1)
if num > 1:
size *= num
axis_defs = [ None ] * pyferret.MAX_FERRET_NDIM
axis_defs[0] = (1, size, 1, "VALUE_NUM", False, )
axis_defs[1] = (1, 3, 1, "OSM,ORD,P", False, )
return axis_defs
def ferret_compute(id, result, resbdf, inputs, inpbdfs):
"""
Assigns to result[:,0] the order statistic medians for
the probability distribution named in inputs[1] with
parameters given in inputs[2]. Assigns to result[:,1]
the ordered response data of the sample values given in
inputs[0]. Assigns to result[:3,2] the slope, intercept,
and correlation coefficient of the line fitted to a plot
of result[:,1] against result[:,0]. Undefined values
in inputs[0] are removed at the beginning of this
computation.
"""
distribname = inputs[1]
distname = pyferret.stats.getdistname(distribname)
if distname is None:
raise ValueError("Unknown probability function %s" % distribname)
distribparams = inputs[2].reshape(-1)
distparams = pyferret.stats.getdistparams(distname, distribparams)
if distparams is None:
raise ValueError("Unknown (for params) probability function %s" % distribname)
sample = inputs[0].reshape(-1)
badmask = ( numpy.fabs(sample - inpbdfs[0]) < 1.0E-5 )
goodmask = numpy.logical_not(numpy.logical_or(badmask, numpy.isnan(sample)))
ppdata = scipy.stats.probplot(sample[goodmask], distparams, distname, fit=1)
result[:] = resbdf
result[goodmask,0,0,0,0,0] = ppdata[0][0]
result[goodmask,1,0,0,0,0] = ppdata[0][1]
result[:3,2,0,0,0,0] = ppdata[1]
#
# The rest of this is just for testing this module at the command line
#
if __name__ == "__main__":
# make sure ferret_init does not have problems
info = ferret_init(0)
# Sample from a normal distribution
ydim = 23
zdim = 13
inpundefval = -1.0E+10
outundefval = -2.0E+10
# select a random sample from a normal distribution
size = ydim * zdim
sample = scipy.stats.norm(5.0, 2.0).rvs(size)
ordata = numpy.sort(sample)
# compare to the standard normal distribution (mu = 0.0, sigma = 1.0)
uvals = numpy.empty(size)
uvals[-1] = numpy.power(0.5, 1.0 / size)
uvals[0] = 1.0 - uvals[-1]
uvals[1:-1] = (numpy.arange(2.0, size-0.5, 1.0) - 0.3175) / (size + 0.365)
osmeds = scipy.stats.norm(0.0, 1.0).ppf(uvals)
# set up for a call to ferret_compute
pfname = "norm"
pfparams = numpy.array([0.0, 1.0], dtype=numpy.float64)
inpbdfs = numpy.array([inpundefval, 0.0, 0.0], dtype=numpy.float64)
resbdf = numpy.array([outundefval], dtype=numpy.float64)
inputarr = numpy.empty((1, ydim + 1, zdim + 1, 1, 1, 1), dtype=numpy.float64, order='F')
expected = numpy.empty(((ydim + 1) * (zdim + 1), 3, 1, 1, 1, 1), dtype=numpy.float64, order='F')
n = 0
index = 0
for j in range(ydim + 1):
for k in range(zdim + 1):
if (k == j) or (k == j+1) or (n >= size):
inputarr[0, j, k, 0, 0, 0] = inpbdfs[0]
expected[index, 0, 0, 0, 0, 0] = resbdf[0]
expected[index, 1, 0, 0, 0, 0] = resbdf[0]
else:
inputarr[0, j, k, 0, 0, 0] = sample[n]
expected[index, 0, 0, 0, 0, 0] = osmeds[n]
expected[index, 1, 0, 0, 0, 0] = ordata[n]
n += 1
index += 1
if n < size:
raise ValueError("Unexpected result: not all values assigned")
fitdata = scipy.stats.linregress(osmeds, ordata)
expected[:3,2,0,0,0,0] = fitdata[:3]
expected[3:,2,0,0,0,0] = resbdf[0]
result = -888888.0 * numpy.ones(((ydim + 1) * (zdim + 1), 3, 1, 1, 1, 1), dtype=numpy.float64, order='F')
# call ferret_compute and check the results
ferret_compute(0, result, resbdf, (inputarr, pfname, pfparams), inpbdfs)
if not numpy.allclose(result, expected):
if not numpy.allclose(result[:,0,0,0,0,0], expected[:,0,0,0,0,0]):
print("Expected[:,0,0,0,0,0] =\n%s" % str(expected[:,0,0,0,0,0]))
print("Result[:,0,0,0,0,0] =\n%s" % str(result[:,0,0,0,0,0]))
if not numpy.allclose(result[:,1,0,0,0,0], expected[:,1,0,0,0,0]):
print("Expected[:,1,0,0,0,0] =\n%s" % str(expected[:,1,0,0,0,0]))
print("Result[:,1,0,0,0,0] =\n%s" % str(result[:,1,0,0,0,0]))
if not numpy.allclose(result[:3,2,0,0,0,0], expected[:3,2,0,0,0,0]):
print("Expected[:3,2,0,0,0,0] =\n%s" % str(expected[:3,2,0,0,0,0]))
print("Result[:3,2,0,0,0,0] =\n%s" % str(result[:3,2,0,0,0,0]))
raise ValueError("Unexpected result")
# All successful
print("Success")
|