1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
#!/usr/bin/env python3
"""Performs unweighted linear regression. Can be invoked from command line by
providing data pairs through stdin.
Nathan Baker, 2003"""
import math
from sys import stdin, stdout, stderr
def fit(data):
""" Accepts list of x,y pairs as input. Returns dictionary of resuls """
outdict = {}
# Get means
xmean = 0
ymean = 0
ndata = len(data)
for pair in data:
x = pair[0]
y = pair[1]
xmean = xmean + x
ymean = ymean + y
xmean = xmean/float(ndata)
ymean = ymean/float(ndata)
outdict["x mean"] = xmean
outdict["y mean"] = ymean
# Get variances
sxx = 0
syy = 0
sxy = 0
for pair in data:
x = pair[0]
y = pair[1]
sxx = sxx + (x-xmean)*(x-xmean)
syy = syy + (y-ymean)*(y-ymean)
sxy = sxy + (x-xmean)*(y-ymean)
covxx = sxx/float(ndata)
covyy = syy/float(ndata)
covxy = sxy/float(ndata)
outdict["xx covariance"] = covxx
outdict["xy covariance"] = covxy
outdict["yy covariance"] = covyy
# Slope
b = sxy/sxx
outdict["slope"] = b
# Intercept
a = ymean - b*xmean
outdict["intercept"] = a
# Correlation coefficient
r2 = sxy*sxy/sxx/syy
outdict["correlation coefficient (r^2)"] = r2
# Residual variance
s2 = (syy - b*sxy)/(float(ndata)-2)
s = math.sqrt(s2)
outdict["residual variance (s^2)"] = s2
# Slope error
eb = s/math.sqrt(sxx)
outdict["slope error"] = eb
# Intercept error
ea = s*math.sqrt((1/float(ndata)) + xmean*xmean/sxx)
outdict["intercept error"] = ea
return outdict
def main():
"""Main driver; reads from stdin"""
infile = stdin
stdout.write("Reading data from %s...\n" % infile.name)
data = []
while (1):
line = infile.readline()
if line == "":
break
line.strip()
words = line.split()
try:
pair1 = float(words[0])
pair2 = float(words[1])
data.append((pair1, pair2))
except Exception:
stderr.write("Ignoring unparseable line: %s\n" % line)
stdout.write("Read %d data points.\n" % len(data))
fitdict = fit(data)
keys = fitdict.keys()
keys.sort()
stdout.write("\nRESULTS:\n")
for key in keys:
stdout.write("%s: %g\n" % (key, fitdict[key]))
if __name__ == "__main__":
main()
|