1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
"""
=================================================================
Displaying Pipelines
=================================================================
The default configuration for displaying a pipeline in a Jupyter Notebook is
`'diagram'` where `set_config(display='diagram')`. To deactivate HTML representation,
use `set_config(display='text')`.
To see more detailed steps in the visualization of the pipeline, click on the
steps in the pipeline.
"""
# %%
# Displaying a Pipeline with a Preprocessing Step and Classifier
################################################################################
# This section constructs a :class:`~sklearn.pipeline.Pipeline` with a preprocessing
# step, :class:`~sklearn.preprocessing.StandardScaler`, and classifier,
# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
# representation.
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn import set_config
steps = [
("preprocessing", StandardScaler()),
("classifier", LogisticRegression()),
]
pipe = Pipeline(steps)
# %%
# To visualize the diagram, the default is `display='diagram'`.
set_config(display="diagram")
pipe # click on the diagram below to see the details of each step
# %%
# To view the text pipeline, change to `display='text'`.
set_config(display="text")
pipe
# %%
# Put back the default display
set_config(display="diagram")
# %%
# Displaying a Pipeline Chaining Multiple Preprocessing Steps & Classifier
################################################################################
# This section constructs a :class:`~sklearn.pipeline.Pipeline` with multiple
# preprocessing steps, :class:`~sklearn.preprocessing.PolynomialFeatures` and
# :class:`~sklearn.preprocessing.StandardScaler`, and a classifier step,
# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
# representation.
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
steps = [
("standard_scaler", StandardScaler()),
("polynomial", PolynomialFeatures(degree=3)),
("classifier", LogisticRegression(C=2.0)),
]
pipe = Pipeline(steps)
pipe # click on the diagram below to see the details of each step
# %%
# Displaying a Pipeline and Dimensionality Reduction and Classifier
################################################################################
# This section constructs a :class:`~sklearn.pipeline.Pipeline` with a
# dimensionality reduction step, :class:`~sklearn.decomposition.PCA`,
# a classifier, :class:`~sklearn.svm.SVC`, and displays its visual
# representation.
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.decomposition import PCA
steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))]
pipe = Pipeline(steps)
pipe # click on the diagram below to see the details of each step
# %%
# Displaying a Complex Pipeline Chaining a Column Transformer
################################################################################
# This section constructs a complex :class:`~sklearn.pipeline.Pipeline` with a
# :class:`~sklearn.compose.ColumnTransformer` and a classifier,
# :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
# representation.
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
numeric_preprocessor = Pipeline(
steps=[
("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),
("scaler", StandardScaler()),
]
)
categorical_preprocessor = Pipeline(
steps=[
(
"imputation_constant",
SimpleImputer(fill_value="missing", strategy="constant"),
),
("onehot", OneHotEncoder(handle_unknown="ignore")),
]
)
preprocessor = ColumnTransformer(
[
("categorical", categorical_preprocessor, ["state", "gender"]),
("numerical", numeric_preprocessor, ["age", "weight"]),
]
)
pipe = make_pipeline(preprocessor, LogisticRegression(max_iter=500))
pipe # click on the diagram below to see the details of each step
# %%
# Displaying a Grid Search over a Pipeline with a Classifier
################################################################################
# This section constructs a :class:`~sklearn.model_selection.GridSearchCV`
# over a :class:`~sklearn.pipeline.Pipeline` with
# :class:`~sklearn.ensemble.RandomForestClassifier` and displays its visual
# representation.
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
numeric_preprocessor = Pipeline(
steps=[
("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),
("scaler", StandardScaler()),
]
)
categorical_preprocessor = Pipeline(
steps=[
(
"imputation_constant",
SimpleImputer(fill_value="missing", strategy="constant"),
),
("onehot", OneHotEncoder(handle_unknown="ignore")),
]
)
preprocessor = ColumnTransformer(
[
("categorical", categorical_preprocessor, ["state", "gender"]),
("numerical", numeric_preprocessor, ["age", "weight"]),
]
)
pipe = Pipeline(
steps=[("preprocessor", preprocessor), ("classifier", RandomForestClassifier())]
)
param_grid = {
"classifier__n_estimators": [200, 500],
"classifier__max_features": ["auto", "sqrt", "log2"],
"classifier__max_depth": [4, 5, 6, 7, 8],
"classifier__criterion": ["gini", "entropy"],
}
grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=1)
grid_search # click on the diagram below to see the details of each step
|