File: fit.py

package info (click to toggle)
apbs 3.4.1-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 199,188 kB
  • sloc: ansic: 284,988; cpp: 60,416; fortran: 44,896; xml: 13,895; sh: 13,838; python: 8,105; yacc: 2,922; makefile: 1,428; f90: 989; objc: 448; lex: 294; awk: 266; sed: 205; java: 134; csh: 79
file content (102 lines) | stat: -rw-r--r-- 2,415 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python3
"""Performs unweighted linear regression.  Can be invoked from command line by
providing data pairs through stdin.

Nathan Baker, 2003"""

import math
from sys import stdin, stdout, stderr


def fit(data):
    """ Accepts list of x,y pairs as input.  Returns dictionary of resuls """
    outdict = {}

    # Get means
    xmean = 0
    ymean = 0
    ndata = len(data)
    for pair in data:
        x = pair[0]
        y = pair[1]
        xmean = xmean + x
        ymean = ymean + y
    xmean = xmean/float(ndata)
    ymean = ymean/float(ndata)
    outdict["x mean"] = xmean
    outdict["y mean"] = ymean

    # Get variances
    sxx = 0
    syy = 0
    sxy = 0
    for pair in data:
        x = pair[0]
        y = pair[1]
        sxx = sxx + (x-xmean)*(x-xmean)
        syy = syy + (y-ymean)*(y-ymean)
        sxy = sxy + (x-xmean)*(y-ymean)
    covxx = sxx/float(ndata)
    covyy = syy/float(ndata)
    covxy = sxy/float(ndata)
    outdict["xx covariance"] = covxx
    outdict["xy covariance"] = covxy
    outdict["yy covariance"] = covyy

    # Slope
    b = sxy/sxx
    outdict["slope"] = b

    # Intercept
    a = ymean - b*xmean
    outdict["intercept"] = a

    # Correlation coefficient
    r2 = sxy*sxy/sxx/syy
    outdict["correlation coefficient (r^2)"] = r2

    # Residual variance
    s2 = (syy - b*sxy)/(float(ndata)-2)
    s = math.sqrt(s2)
    outdict["residual variance (s^2)"] = s2

    # Slope error
    eb = s/math.sqrt(sxx)
    outdict["slope error"] = eb

    # Intercept error
    ea = s*math.sqrt((1/float(ndata)) + xmean*xmean/sxx)
    outdict["intercept error"] = ea

    return outdict


def main():
    """Main driver; reads from stdin"""
    infile = stdin
    stdout.write("Reading data from %s...\n" % infile.name)

    data = []
    while (1):
        line = infile.readline()
        if line == "":
            break
        line.strip()
        words = line.split()
        try:
            pair1 = float(words[0])
            pair2 = float(words[1])
            data.append((pair1, pair2))
        except Exception:
            stderr.write("Ignoring unparseable line:  %s\n" % line)
    stdout.write("Read %d data points.\n" % len(data))
    fitdict = fit(data)
    keys = fitdict.keys()
    keys.sort()
    stdout.write("\nRESULTS:\n")
    for key in keys:
        stdout.write("%s:  %g\n" % (key, fitdict[key]))


if __name__ == "__main__":
    main()