File: common.py

package info (click to toggle)
scikit-learn 0.11.0-2%2Bdeb7u1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 13,900 kB
  • sloc: python: 34,740; ansic: 8,860; cpp: 8,849; pascal: 230; makefile: 211; sh: 14
file content (28 lines) | stat: -rw-r--r-- 839 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
"""
Common utilities for testing clustering.

"""

import numpy as np


###############################################################################
# Generate sample data

def generate_clustered_data(seed=0, n_clusters=3, n_features=2,
                            n_samples_per_cluster=20, std=.4):
    prng = np.random.RandomState(seed)

    # the data is voluntary shifted away from zero to check clustering
    # algorithm robustness w.r.t. non centered data
    means = np.array([[1,  1, 1, 0],
                      [-1, -1, 0, 1],
                      [1, -1, 1, 1],
                      [-1, 1, 1, 0],
                    ]) + 10

    X = np.empty((0, n_features))
    for i in range(n_clusters):
        X = np.r_[X, means[i][:n_features]
                  + std * prng.randn(n_samples_per_cluster, n_features)]
    return X