File: utils.py

package info (click to toggle)
python-scipy 0.6.0-12
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 32,016 kB
  • ctags: 46,675
  • sloc: cpp: 124,854; ansic: 110,614; python: 108,664; fortran: 76,260; objc: 424; makefile: 384; sh: 10
file content (64 lines) | stat: -rw-r--r-- 1,871 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#! /usr/bin/env python
# Last Change: Mon Jul 09 05:00 PM 2007 J

# Various utilities for examples 

import numpy as N
from numpy.testing import set_package_path, restore_path

from scikits.learn.datasets import oldfaithful, pendigits, iris

def get_faithful():
    """Return faithful data as a nx2 array, first column being duration, second
    being waiting time."""
    # Load faithful data, convert waiting into integer, remove L, M and S data
    data = oldfaithful.load()
    tmp1 = []
    tmp2 = []
    for i in data:
        if not (i[0] == 'L' or i[0] == 'M' or i[0] == 'S'):
            tmp1.append(i[0])
            tmp2.append(i[1])
            
    waiting = N.array([int(i) for i in tmp1], dtype = N.float)
    duration = N.array([i for i in tmp2], dtype = N.float)

    waiting = waiting[:, N.newaxis]
    duration = duration[:, N.newaxis]

    return N.concatenate((waiting, duration), 1)

def get_pendigits():
    """Return faithful data as a nx2 array, first column being duration, second
    being waiting time."""
    # Load faithful data, convert waiting into integer, remove L, M and S data
    data = pendigits.load()
    return data['training']['x'], data['training']['y']

def scale(data, mode = 'sym'):
    """Linearly scale data in place such as each col is in the range [0..1].

    Returns the translation factor t and scaling factor s. You can retrieve
    the original values with data = s * scaled + t."""
    n = N.min(data, 0)
    m = N.max(data, 0)
    if mode == 'sym':
        t = n + 0.5 * (m - n)
        s = 0.5 * (m - n)
    elif mode == 'right':
        t = n
        s = m - n
    else:
        raise ValueError("Mode %s not recognized" % mode)
    
    data -= t
    data /= s
    return t, s

if __name__ == '__main__':
    a = N.random.randn(10, 2)
    b = a.copy()
    scale(a)
    print a
    scale(b, 'right')
    print b