1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
"""
==================================
Comparing various online solvers
==================================
An example showing how different online solvers perform
on the hand-written digits dataset.
"""
# Author: Rob Zinkov <rob at zinkov dot com>
# License: BSD 3 clause
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier, Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import LogisticRegression
heldout = [0.95, 0.90, 0.75, 0.50, 0.01]
rounds = 20
digits = datasets.load_digits()
X, y = digits.data, digits.target
classifiers = [
("SGD", SGDClassifier()),
("ASGD", SGDClassifier(average=True)),
("Perceptron", Perceptron()),
("Passive-Aggressive I", PassiveAggressiveClassifier(loss='hinge',
C=1.0)),
("Passive-Aggressive II", PassiveAggressiveClassifier(loss='squared_hinge',
C=1.0)),
("SAG", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))
]
xx = 1. - np.array(heldout)
for name, clf in classifiers:
print("training %s" % name)
rng = np.random.RandomState(42)
yy = []
for i in heldout:
yy_ = []
for r in range(rounds):
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=i, random_state=rng)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
yy_.append(1 - np.mean(y_pred == y_test))
yy.append(np.mean(yy_))
plt.plot(xx, yy, label=name)
plt.legend(loc="upper right")
plt.xlabel("Proportion train")
plt.ylabel("Test Error Rate")
plt.show()
|