File: pm_fitting.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (133 lines) | stat: -rw-r--r-- 3,925 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Copyright 2014-2016 by Marco Galardini.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Growth curves fitting and parameters extraction for phenotype data

This module provides functions to perform sigmoid functions fitting to
Phenotype Microarray data. This module depends on scipy curve_fit function.
If not available, a warning is raised.

Functions:
logistic           Logistic growth model.
gompertz           Gompertz growth model.
richards           Richards growth model.
guess_plateau      Guess the plateau point to improve sigmoid fitting.
guess_lag          Guess the lag point to improve sigmoid fitting.
fit                Sigmoid functions fit.
get_area           Calculate the area under the PM curve."""

import numpy as np

try:
    from scipy.optimize.minpack import curve_fit
    from scipy.integrate import trapz
except ImportError:
    from Bio import MissingPythonDependencyError
    raise MissingPythonDependencyError(
        'Install scipy to extract curve parameters.')


def logistic(x, A, u, d, v, y0):
    """Logistic growth model

    Proposed in Zwietering et al., 1990 (PMID: 16348228)
    """
    y = (A / (1 + np.exp((((4 * u) / A) * (d - x)) + 2))) + y0
    return y


def gompertz(x, A, u, d, v, y0):
    """Gompertz growth model

    Proposed in Zwietering et al., 1990 (PMID: 16348228)
    """
    y = (A * np.exp(-np.exp((((u * np.e) / A) * (d - x)) + 1))) + y0
    return y


def richards(x, A, u, d, v, y0):
    """Gompertz growth model (equivalent to Stannard)

    Proposed in Zwietering et al., 1990 (PMID: 16348228)
    """
    y = (A * pow(1 + (v + (np.exp(1 + v) * np.exp((u / A) *
                                                  (1 + v) * (1 + (1 / v)) * (d - x)))), -(1 / v))) + y0
    return y


def guess_lag(x, y):
    """Given two axes returns a guess of the lag point.

    The lag point is defined as the x point where the difference in y
    with the next point is higher then the mean differences between
    the points plus one standard deviation. If such point is not found
    or x and y have different lengths the function returns zero.
    """
    if len(x) != len(y):
        return 0

    diffs = []
    indexes = range(len(x))

    for i in indexes:
        if i + 1 not in indexes:
            continue
        diffs.append(y[i + 1] - y[i])
    diffs = np.array(diffs)

    flex = x[-1]
    for i in indexes:
        if i + 1 not in indexes:
            continue
        if (y[i + 1] - y[i]) > (diffs.mean() + (diffs.std())):
            flex = x[i]
            break

    return flex


def guess_plateau(x, y):
    """Given two axes returns a guess of the plateau point.

    The plateau point is defined as the x point where the y point
    is near one standard deviation of the differences between the y points to
    the maximum y value. If such point is not found or x and y have
    different lengths the function returns zero.
    """
    if len(x) != len(y):
        return 0

    diffs = []
    indexes = range(len(y))

    for i in indexes:
        if i + 1 not in indexes:
            continue
        diffs.append(y[i + 1] - y[i])
    diffs = np.array(diffs)

    ymax = y[-1]
    for i in indexes:
        if y[i] > (ymax - diffs.std()) and y[i] < (ymax + diffs.std()):
            ymax = y[i]
            break

    return ymax


def fit(function, x, y):
    """Fit the provided functrion to the x and y values.

    The function parameters and the parameters covariance."""
    # Compute guesses for the parameters
    # This is necessary to get significant fits
    p0 = [guess_plateau(x, y), 4.0, guess_lag(x, y), 0.1, min(y)]

    params, pcov = curve_fit(function, x, y, p0=p0)
    return params, pcov


def get_area(y, x):
    """Get the area under the curve"""
    return trapz(y=y, x=x)