File: stutil.py

package info (click to toggle)
openstructure 2.11.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 206,240 kB
  • sloc: cpp: 188,571; python: 36,686; ansic: 34,298; fortran: 3,275; sh: 312; xml: 146; makefile: 29
file content (76 lines) | stat: -rw-r--r-- 1,831 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import math

def Mean(xs):
  """
  Calculate mean of dataset
  """
  if len(xs)==0:
    raise RuntimeError("Can't calculate mean of empty sequence")
  return float(sum(xs))/len(xs)

def Median(xs):
  """
  Calculate median of dataset
  """
  if len(xs)==0:
    raise RuntimeError("Can't calculate median of empty sequence")
  sorted_xs=sorted(xs)
  central_idx = int((len(xs)-1)/2) 
  if (len(xs) % 2)==0:
    return (sorted_xs[central_idx]+sorted_xs[central_idx+1])/2.0
  else:
    return sorted_xs[central_idx]

def StdDev(xs):
  """
  Calculate standard-deviation of dataset
  
            | sum[xi-<x>]^2 |
  sigma=sqrt|---------------|
            |       n       |
  """
  mean=Mean(xs)
  return math.sqrt(sum([(x-mean)**2 for x in xs])/len(xs))

def Min(xs):
  return min(xs)

def Max(xs):
  return max(xs)

def Correl(xs, ys):
  """
  Calculates the correlation coefficient between xs and ys as
  
    sum[(xi-<x>)*(yi-<y>)]
  r=----------------------
          sx*sy
          
  where <x>, <y> are the mean of dataset xs and ys, and, sx and sy are the 
  standard deviations.
  """
  if len(xs)!=len(ys):
    raise RuntimeError("Can't calculate correl. Sequence lengths do not match.")
  if len(xs)==1:
    raise RuntimeError("Can't calculate correl of sequences with length 1.")
  mean_x=Mean(xs)
  mean_y=Mean(ys)
  sigma_x, sigma_y=(0.0, 0.0)
  cross_term=0.0
  for x, y in zip(xs, ys):
    cross_term+=(x-mean_x)*(y-mean_y)
    sigma_x+=(x-mean_x)**2
    sigma_y+=(y-mean_y)**2
  sigma_x=math.sqrt(sigma_x)
  sigma_y=math.sqrt(sigma_y)
  return cross_term/(sigma_x*sigma_y)

def Histogram(xs, bounds, num_bins):
  bins=[0 for i in range(num_bins)]
  d=1.0*num_bins/(bounds[1]-bounds[0])
  for x in xs:
    index=int((x-bounds[0])*d)
    if index>num_bins-1 or index<0:
      continue
    bins[index]+=1
  return bins