File: plot_sgd_weighted_classes.py

package info (click to toggle)
scikit-learn 0.11.0-2%2Bdeb7u1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 13,900 kB
  • sloc: python: 34,740; ansic: 8,860; cpp: 8,849; pascal: 230; makefile: 211; sh: 14
file content (56 lines) | stat: -rw-r--r-- 1,430 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
================================================
SGD: Separating hyperplane with weighted classes
================================================

Fit linear SVMs with and without class weighting.
Allows to handle problems with unbalanced classes.

"""
print __doc__

import numpy as np
import pylab as pl
from sklearn.linear_model import SGDClassifier

# we create 40 separable points
np.random.seed(0)
n_samples_1 = 1000
n_samples_2 = 100
X = np.r_[1.5 * np.random.randn(n_samples_1, 2),
          0.5 * np.random.randn(n_samples_2, 2) + [2, 2]]
y = np.array([0] * (n_samples_1) + [1] * (n_samples_2), dtype=np.float64)
idx = np.arange(y.shape[0])
np.random.shuffle(idx)
X = X[idx]
y = y[idx]
mean = X.mean(axis=0)
std = X.std(axis=0)
X = (X - mean) / std

# fit the model and get the separating hyperplane
clf = SGDClassifier(n_iter=100, alpha=0.01)
clf.fit(X, y)

w = clf.coef_.ravel()
a = -w[0] / w[1]
xx = np.linspace(-5, 5)
yy = a * xx - clf.intercept_ / w[1]


# get the separating hyperplane using weighted classes
wclf = SGDClassifier(n_iter=100, alpha=0.01, class_weight={1: 10})
wclf.fit(X, y)

ww = wclf.coef_.ravel()
wa = -ww[0] / ww[1]
wyy = wa * xx - wclf.intercept_ / ww[1]

# plot separating hyperplanes and samples
h0 = pl.plot(xx, yy, 'k-', label='no weights')
h1 = pl.plot(xx, wyy, 'k--', label='with weights')
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=pl.cm.Paired)
pl.legend()

pl.axis('tight')
pl.show()