1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
import matplotlib.pyplot as plt
import numba
import numpy as np
from sklearn.datasets import load_digits, make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from umap import *
print('import done')
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target, stratify=digits.target, random_state=1000
)
trans = UMAP(
n_neighbors=5,
random_state=42,
metric="manhattan",
output_metric="manhattan",
verbose=True,
).fit(X_train)
plt.scatter(trans.embedding_[:, 0], trans.embedding_[:, 1], c=y_train, cmap="Spectral")
plt.savefig('foo.png')
x, y = make_classification(
n_samples=1000,
n_features=300,
n_informative=250,
n_redundant=0,
n_repeated=0,
n_classes=2,
random_state=1212,
)
print('Splitting into training and testing data ...')
X_train, X_test, y_train, y_test = train_test_split(
x, y, test_size=0.2, random_state=42
)
print('Run linear SVM algorithm')
svc = LinearSVC(dual=False, random_state=123)
params_grid = {"C": [10 ** k for k in range(-3, 4)]}
clf = GridSearchCV(svc, params_grid)
clf.fit(X_train, y_train)
print(
"Accuracy on the test set with raw data: {:.3f}".format(clf.score(X_test, y_test))
)
print('Transform with UMAP, then run SVM')
umap = UMAP(random_state=456)
pipeline = Pipeline([("umap", umap), ("svc", svc)])
params_grid_pipeline = {
"umap__n_neighbors": [5, 20],
"umap__n_components": [15, 25, 50],
"svc__C": [10 ** k for k in range(-3, 4)],
}
clf_pipeline = GridSearchCV(pipeline, params_grid_pipeline)
clf_pipeline.fit(X_train, y_train)
print(
"Accuracy on the test set with UMAP transformation: {:.3f}".format(
clf_pipeline.score(X_test, y_test)
)
)
|