File: LinearRegression.py

package info (click to toggle)
pymca 5.8.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 44,392 kB
  • sloc: python: 155,456; ansic: 15,843; makefile: 116; sh: 73; xml: 55
file content (179 lines) | stat: -rw-r--r-- 6,697 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#/*##########################################################################
#
# The PyMca X-Ray Fluorescence Toolkit
#
# Copyright (c) 2004-2016 European Synchrotron Radiation Facility
#
# This file is part of the PyMca X-ray Fluorescence Toolkit developed at
# the ESRF by the Software group.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
#############################################################################*/
__author__ = "V.A. Sole - ESRF Data Analysis"
__contact__ = "sole@esrf.fr"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"

import numpy
from numpy.linalg import inv
import sys

def linregress(x, y, sigmay=None, full_output=False):
    """
    Linear fit to a straight line following P.R. Bevington:
    
    "Data Reduction and Error Analysis for the Physical Sciences"

    It tries to be an improved version of scipystats.linregress

    Parameters
    ----------
    x, y : array_like
        two sets of measurements.  Both arrays should have the same length.

    sigmay : The uncertainty on the y values
        
    Returns
    -------
    slope : float
        slope of the regression line
    intercept : float
        intercept of the regression line
    r_value : float
        correlation coefficient

    if full_output is true, an additional dictionary is returned with the keys

    sigma_slope: uncertainty on the slope

    sigma_intercept: uncertainty on the intercept

    stderr: float
        square root of the variance
    
    """
    x = numpy.asarray(x, dtype=numpy.float64).flatten()
    y = numpy.asarray(y, dtype=numpy.float64).flatten()
    N = y.size
    if sigmay is None:
        sigmay = numpy.ones((N,), dtype=y.dtype)
    else:
        sigmay = numpy.asarray(sigmay, dtype=numpy.float64).flatten()
    w = 1.0 / (sigmay * sigmay + (sigmay == 0))

    n = S = w.sum()
    Sx = (w * x).sum()
    Sy = (w * y).sum()    
    Sxx = (w * x * x).sum()
    Sxy = ((w * x * y)).sum()
    Syy = ((w * y * y)).sum()
    # SSxx is identical to delta in Bevington book
    delta = SSxx = (S * Sxx - Sx * Sx)

    tmpValue = Sxx * Sy - Sx * Sxy
    intercept = tmpValue / delta
    SSxy = (S * Sxy - Sx * Sy)
    slope = SSxy / delta
    sigma_slope = numpy.sqrt(S /delta)
    sigma_intercept = numpy.sqrt(Sxx / delta)

    SSyy = (n * Syy - Sy * Sy)
    r_value = SSxy / numpy.sqrt(SSxx * SSyy)
    if r_value > 1.0:
        r_value = 1.0
    if r_value < -1.0:
        r_value = -1.0

    if not full_output:
        return slope, intercept, r_value

    ddict = {}
    # calculate the variance
    if N < 3:
        variance = 0.0
    else:
        variance = ((y - intercept - slope * x) ** 2).sum() / (N - 2)
    ddict["variance"] = variance
    ddict["stderr"] = numpy.sqrt(variance)
    ddict["slope"] = slope
    ddict["intercept"] = intercept
    ddict["r_value"] = r_value
    ddict["sigma_intercept"] = numpy.sqrt(Sxx / SSxx)
    ddict["sigma_slope"] = numpy.sqrt(S / SSxx)
    return slope, intercept, r_value, ddict

def main(argv=None):
    if argv is None:
        # Bevington data of Table 6-2
        x = [0, 15, 30, 45, 60, 75, 90, 105, 120, 135]
        y = [106, 80, 98, 75, 74, 73, 49, 38, 37, 22]
        sigmay = numpy.sqrt(numpy.array(y))
        slope, intercept, r, ddict = linregress(x, y, sigmay=sigmay, full_output=True)
        print("WEIGHTED DATA")
        print("LINREGRESS results")
        print("SLOPE = ", ddict["slope"], " +/- ", ddict["sigma_slope"])
        print("INTERCEPT = ", ddict["intercept"], " +/- ", ddict["sigma_intercept"])
        from PyMca5.PyMcaMath.linalg import lstsq
        derivatives = numpy.zeros((len(y), 2))
        derivatives[:, 0] = numpy.array(x, dtype=numpy.float64)
        derivatives[:, 1] = 1.0
        print("LEAST SQUARES RESULT")
        result = lstsq(derivatives, y, sigma_b=sigmay, weight=1, uncertainties=True)
        print("SLOPE = ", result[0][0], " +/- ", result[1][0])
        print("INTERCEPT = ", result[0][1], " +/- ", result[1][1])
        print("\n\n")

        # Bevington data of Table 6-1
        x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        y = [15.6, 17.5, 36.6, 43.8, 58.2, 61.6, 64.2, 70.4, 98.8]
        print("UNWEIGHTED DATA")
        slope, intercept, r, ddict = linregress(x, y, sigmay=None, full_output=True)
        print("LINREGRESS results")
        print("SLOPE = ", ddict["slope"], " +/- ", ddict["sigma_slope"])
        print("INTERCEPT = ", ddict["intercept"], " +/- ", ddict["sigma_intercept"])
        derivatives = numpy.zeros((len(y), 2))
        derivatives[:, 0] = numpy.array(x, dtype=numpy.float64)
        derivatives[:, 1] = 1.0
        print("LEAST SQUARES RESULT")
        result = lstsq(derivatives, y, sigma_b=None, weight=0, uncertainties=True)
        print("SLOPE = ", result[0][0], " +/- ", result[1][0])
        print("INTERCEPT = ", result[0][1], " +/- ", result[1][1])            
        print("\n\n")
    elif len(argv) > 1:
        # assume we have got a two (or three) column csv file
        data = numpy.loadtxt(argv[1])
        x = data[:, 0]
        y = data[:, 1]
        if data.shape[1] > 2:
            sigmay = data[:, 2]
        else:
            sigmay = None
        slope, intercept, r, ddict = linregress(x, y,
                                                sigmay=sigmay,
                                                full_output=True)
        print("LINREGRESS results")
        print("SLOPE = ", ddict["slope"], " +/- ", ddict["sigma_slope"])
        print("INTERCEPT = ", ddict["intercept"], " +/- ", ddict["sigma_intercept"])
    else:
        print("RateLaw [csv_file_name]")
        return

if __name__ == "__main__":
    main(sys.argv)