1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
"""
Returns parameters resulting from a linear regression of one set
of given data against another set of given data.
"""
from __future__ import print_function
import math
import numpy
import pyferret
import scipy.stats
def ferret_init(id):
"""
Initialization for the stats_linregress python-backed ferret external function
"""
axes_values = [ pyferret.AXIS_DOES_NOT_EXIST ] * pyferret.MAX_FERRET_NDIM
axes_values[0] = pyferret.AXIS_CUSTOM
false_influences = [ False ] * pyferret.MAX_FERRET_NDIM
retdict = { "numargs": 2,
"descript": "Returns slope, intercept, correlation coeff (r), and num good pts for a linear regression",
"axes": axes_values,
"argnames": ( "XVALS", "YVALS", ),
"argdescripts": ( "Abscissa values for the linear regression fit",
"Ordinate values for the linear regression fit", ),
"argtypes": ( pyferret.FLOAT_ARRAY, pyferret.FLOAT_ARRAY, ),
"influences": ( false_influences, false_influences, ),
}
return retdict
def ferret_custom_axes(id):
"""
Define the limits and (unit)name of the custom axis of the
array containing the returned parameters. The values returned
are from the scipy.stats.linregress function: slope, intercept,
and correlation coefficient; plus the number of defined points
used in the fitting. The probability of zero slope and
standard error of the estimate computed by linregress are not
returned. (The standard error of the estimate returned is
incorrect. The value returned is close but larger than the
square of the correct value.)
"""
axis_defs = [ None ] * pyferret.MAX_FERRET_NDIM
axis_defs[0] = ( 1, 4, 1, "M,B,R,N", False, )
return axis_defs
def ferret_compute(id, result, resbdf, inputs, inpbdfs):
"""
Assigns result with parameters for the linear regression of the
ordinate values given inputs[1] to the abscissa values given in
inputs[0]. If the input arrays inputs[0] and inputs[1] do not
have the same shape (the same lengths of each of the four axes),
then then must each have only one defined non-singular axis with
matching lengths. Result is assigned the slope, intercept, and
sample correlation coefficient from scipy.stats.linregress as
well as the number of defined points used in the fitting. The
probability of zero slope and standard error of the estimate
computed by linregress are not returned. (The standard error of
the estimate returned is incorrect. The value returned is close
but larger than the square of the correct value.)
"""
if inputs[0].shape != inputs[1].shape :
shp0 = inputs[0].squeeze().shape
shp1 = inputs[1].squeeze().shape
if (len(shp0) > 1) or (len(shp1) > 1) or (shp0 != shp1):
raise ValueError("XVALS and YVALS must either have identical dimensions or "\
"both have only one defined non-singular axis of the same length")
abscissa = inputs[0].reshape(-1)
ordinate = inputs[1].reshape(-1)
badmaska = ( numpy.fabs(abscissa - inpbdfs[0]) < 1.0E-5 )
badmaska = numpy.logical_or(badmaska, numpy.isnan(abscissa))
badmasko = ( numpy.fabs(ordinate - inpbdfs[1]) < 1.0E-5 )
badmasko = numpy.logical_or(badmasko, numpy.isnan(ordinate))
goodmask = numpy.logical_not(numpy.logical_or(badmaska, badmasko))
xvals = numpy.array(abscissa[goodmask], dtype=numpy.float64)
numpts = len(xvals)
if numpts < 2:
raise ValueError("Not enough defined points in common in XVALS and YVALS")
yvals = numpy.array(ordinate[goodmask], dtype=numpy.float64)
fitparams = scipy.stats.linregress(xvals, yvals)
result[:] = resbdf
# slope
result[0] = fitparams[0]
# intercept
result[1] = fitparams[1]
# correlation coefficient
result[2] = fitparams[2]
# ignore the probability of zero coefficient (fitparams[3]) and
# the incorrect standard error of the estimate (fitparams[4])
# number of good pts
result[3] = numpts
#
# The rest of this is just for testing this module at the command line
#
if __name__ == "__main__":
# just make sure these calls don't throw errors
info = ferret_init(0)
info = ferret_custom_axes(0)
# create some data to fit
slope = -5.0
intercept = 15.0
xvals = numpy.arange(0.0, 10.0, 0.01)
fuzz = scipy.stats.norm(0.0, 0.01).rvs(1000)
yvals = slope * xvals + intercept + fuzz
inpbdfs = numpy.array([-9999.0, -8888.0], dtype=numpy.float64)
resbdf = numpy.array([-7777.0], dtype=numpy.float64)
abscissa = numpy.empty((1, 1000, 1, 1, 1, 1), dtype=numpy.float64, order='F')
ordinate = numpy.empty((1, 1, 1000, 1, 1, 1), dtype=numpy.float64, order='F')
goodvals = numpy.empty((1000,), dtype=bool)
index = 0
numgood = 0
for j in range(1000):
if (index % 53) == 13:
abscissa[0, j, 0, 0, 0, 0] = inpbdfs[0]
else:
abscissa[0, j, 0, 0, 0, 0] = xvals[j]
if (index % 73) == 13:
ordinate[0, 0, j, 0, 0, 0] = inpbdfs[1]
else:
ordinate[0, 0, j, 0, 0, 0] = yvals[j]
if ((index % 53) == 13) or ((index % 73) == 13):
goodvals[index] = False
else:
goodvals[index] = True
numgood += 1
index += 1
result = -5555.0 * numpy.ones((4,), dtype=numpy.float64)
ferret_compute(0, result, resbdf, ( abscissa, ordinate, ), inpbdfs)
xvals = xvals[goodvals]
xave = xvals.mean()
yvals = yvals[goodvals]
yave = yvals.mean()
m = (xvals * yvals - xave * yave).sum() / (xvals * xvals - xave * xave).sum()
b = yave - m * xave
yxdeltas = yvals - (m * xvals + b)
yxsumsq = (yxdeltas * yxdeltas).sum()
# std_err_est = math.sqrt(yxsumsq / float(numgood - 2))
ydeltas = yvals - yave
ysumsq = (ydeltas * ydeltas).sum()
rsq = 1.0 - (yxsumsq / ysumsq)
r = math.sqrt(rsq)
if m < 0.0:
r *= -1.0
expected = numpy.array([m, b, r, numgood], dtype=numpy.float64)
if not numpy.allclose(result, expected, rtol=1.0E-5, atol=1.0E-5):
raise ValueError("Linear regression fail\nexpected params:\n%s\nfound params:\n%s" % \
(str(expected), str(result)))
# All successful
print("Success")
|