1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
import tempfile
import pytest
import numpy as np
from pandas import DataFrame
import joblib
from sklearn_pandas import DataFrameMapper
from sklearn_pandas import NumericalTransformer
@pytest.fixture
def simple_dataset():
return DataFrame({
'feat1': [1, 2, 1, 3, 1],
'feat2': [1, 2, 2, 2, 3],
'feat3': [1, 2, 3, 4, 5],
})
def test_common_numerical_transformer(simple_dataset):
"""
Test log transformation
"""
transfomer = DataFrameMapper([
('feat1', NumericalTransformer('log'))
], df_out=True)
df = simple_dataset
outDF = transfomer.fit_transform(df)
assert list(outDF.columns) == ['feat1']
assert np.array_equal(df['feat1'].apply(np.log).values, outDF.feat1.values)
def test_numerical_transformer_serialization(simple_dataset):
"""
Test if you can serialize transformer
"""
transfomer = DataFrameMapper([
('feat1', NumericalTransformer('log'))
])
df = simple_dataset
transfomer.fit(df)
f = tempfile.NamedTemporaryFile(delete=True)
joblib.dump(transfomer, f.name)
transfomer2 = joblib.load(f.name)
np.array_equal(transfomer.transform(df), transfomer2.transform(df))
f.close()
|