File: statistics.py

package info (click to toggle)
python-expyriment 0.7.0%2Bgit34-g55a4e7e-3.2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,504 kB
  • ctags: 2,094
  • sloc: python: 12,766; makefile: 150
file content (153 lines) | stat: -rw-r--r-- 3,102 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""
The statistics module.

This module contains miscellaneous stastistical functions for expyriment.

"""

__author__ = 'Florian Krause <florian@expyriment.org>, \
Oliver Lindemann <oliver@expyriment.org>'
__version__ = '0.7.0'
__revision__ = '55a4e7e'
__date__ = 'Wed Mar 26 14:33:37 2014 +0100'


def sum(data):
    """Returns the sum of data.

    The function ignores all non-numerical elements in the data and returns
    None if no numerical element has been found. In contrast to standard math
    and numpy functions, this function is robust against type violations.

    Parameters
    ----------
    data : list
        list of numerical data

    Returns
    -------
    out : float or None

    """

    s = 0
    elem_found = False
    for v in data:
        try:
            s += v
            elem_found = True
        except:
            pass
    if elem_found:
        return s
    else:
        return None


def mode(data):
    """Returns the mode, that is, the most frequent value in data.

    Parameters
    ----------
    data : list
        list of numerical data

    Returns
    -------
    out : float or None

    """

    freq = frequence_table(data)
    Fmax = max(freq.values())
    for x, f in freq.items():
        if f == Fmax:
            break
    return x

def mean(data):
    """Returns the mean of data.

    Notes
    -----
    The function ignores all non-numerical elements in the data and returns
    None if no numerical element has been found. In contrast to standard math
    and numpy functions, this function is robust against type violations.

    Parameters
    ----------
    data : list
        list of numerical data

    Returns
    -------
    out : float or None

    """

    s = 0
    cnt = 0
    for v in data:
        try:
            s += v
            cnt += 1
        except:
            pass
    if cnt == 0:
        return None
    else:
        return float(s) / float(cnt)

def median(data):
    """Returns the median of data.

    Notes
    -----
    The function ignores all non-numerical elements in the data and returns
    None if no numerical element has been found. In contrast to standard math
    and numpy functions, this function is robust against type violations.

    Parameters
    ----------
    data : list
        list of numerical data

    Returns
    -------
    out : float or None

    """

    tmp = []
    for elem in data: # remove non numerics
        if isinstance(elem, (int, long, float)):
            tmp.append(elem)
    data = sorted(tmp)
    if len(data) % 2 == 1:
        return data[(len(data) - 1) / 2 ]
    else:
        lower = data[len(data) / 2 - 1]
        upper = data[len(data) / 2]
        return (float(lower + upper)) / 2.0

def frequence_table(data):
    """Returns the frequency table of the data as dictionary.

    Parameters
    ----------
    data : list
        list of numerical data

    Returns
    -------
    out : dict
        `dict.keys` : values, `dict.values` : frequencies


    """

    freq = {}
    for x in data:
        freq[x] = freq.get(x, 0) + 1
    return freq