File: test_fit.py

package info (click to toggle)
python-scipy 0.10.1%2Bdfsg2-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 42,232 kB
  • sloc: cpp: 224,773; ansic: 103,496; python: 85,210; fortran: 79,130; makefile: 272; sh: 43
file content (70 lines) | stat: -rw-r--r-- 2,619 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# NOTE: contains only one test, _est_cont_fit, that is renamed so that
#       nose doesn't run it
# I put this here for the record and for the case when someone wants to
# verify the quality of fit
# with current parameters: relatively small sample size, default starting values
#       Ran 84 tests in 401.797s
#       FAILED (failures=15)


import numpy.testing as npt
import numpy as np

from scipy import stats

from test_continuous_basic import distcont

# this is not a proper statistical test for convergence, but only
# verifies that the estimate and true values don't differ by too much
n_repl1 = 1000 # sample size for first run
n_repl2 = 5000 # sample size for second run, if first run fails
thresh_percent = 0.25 # percent of true parameters for fail cut-off
thresh_min = 0.75  # minimum difference estimate - true to fail test

#distcont = [['genextreme', (3.3184017469423535,)]]

def _est_cont_fit():
    # this tests the closeness of the estimated parameters to the true
    # parameters with fit method of continuous distributions
    # Note: is slow, some distributions don't converge with sample size <= 10000

    for distname, arg in distcont:
        yield check_cont_fit, distname,arg


def check_cont_fit(distname,arg):
    distfn = getattr(stats, distname)
    rvs = distfn.rvs(size=n_repl1,*arg)
    est = distfn.fit(rvs)  #,*arg) # start with default values

    truearg = np.hstack([arg,[0.0,1.0]])
    diff = est-truearg

    txt = ''
    diffthreshold = np.max(np.vstack([truearg*thresh_percent,
                    np.ones(distfn.numargs+2)*thresh_min]),0)
    # threshold for location
    diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])

    if np.any(np.isnan(est)):
        raise AssertionError('nan returned in fit')
    else:
        if np.any((np.abs(diff) - diffthreshold) > 0.0):
##            txt = 'WARNING - diff too large with small sample'
##            print 'parameter diff =', diff - diffthreshold, txt
            rvs = np.concatenate([rvs,distfn.rvs(size=n_repl2-n_repl1,*arg)])
            est = distfn.fit(rvs) #,*arg)
            truearg = np.hstack([arg,[0.0,1.0]])
            diff = est-truearg
            if np.any((np.abs(diff) - diffthreshold) > 0.0):
                txt  = 'parameter: %s\n' % str(truearg)
                txt += 'estimated: %s\n' % str(est)
                txt += 'diff     : %s\n' % str(diff)
                raise AssertionError('fit not very good in %s\n' % distfn.name + txt)



if __name__ == "__main__":
    import nose
    #nose.run(argv=['', __file__])
    nose.runmodule(argv=[__file__,'-s'], exit=False)