1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
from collections import Counter
import pytest
import numpy as np
from pandas import DataFrame
from numpy.testing import assert_array_equal
from sklearn_pandas import DataFrameMapper
from sklearn_pandas.features_generator import gen_features
class MockClass(object):
def __init__(self, value=1, name='class'):
self.value = value
self.name = name
class MockTransformer(object):
def __init__(self):
self.most_common_ = None
def fit(self, X, y=None):
[(value, _)] = Counter(X).most_common(1)
self.most_common_ = value
return self
def transform(self, X, y=None):
return np.asarray([self.most_common_] * len(X))
@pytest.fixture
def simple_dataset():
return DataFrame({
'feat1': [1, 2, 1, 3, 1],
'feat2': [1, 2, 2, 2, 3],
'feat3': [1, 2, 3, 4, 5],
})
def test_generate_features_with_default_parameters():
"""
Tests generating features from classes with default init arguments.
"""
columns = ['colA', 'colB', 'colC']
feature_defs = gen_features(columns=columns, classes=[MockClass])
assert len(feature_defs) == len(columns)
for feature in feature_defs:
assert feature[2] == {}
feature_dict = dict([_[0:2] for _ in feature_defs])
assert columns == sorted(feature_dict.keys())
# default init arguments for MockClass for clarification.
expected = {'value': 1, 'name': 'class'}
for column, transformers in feature_dict.items():
for obj in transformers:
assert_attributes(obj, **expected)
def test_generate_features_with_several_classes():
"""
Tests generating features pipeline with different transformers parameters.
"""
feature_defs = gen_features(
columns=['colA', 'colB', 'colC'],
classes=[
{'class': MockClass},
{'class': MockClass, 'name': 'mockA'},
{'class': MockClass, 'name': 'mockB', 'value': None}
]
)
for col, transformers, params in feature_defs:
assert_attributes(transformers[0], name='class', value=1)
assert_attributes(transformers[1], name='mockA', value=1)
assert_attributes(transformers[2], name='mockB', value=None)
def test_generate_features_with_none_only_transformers():
"""
Tests generating "dummy" feature definition which doesn't apply any
transformation.
"""
feature_defs = gen_features(
columns=['colA', 'colB', 'colC'], classes=[None])
expected = [('colA', None, {}),
('colB', None, {}),
('colC', None, {})]
assert feature_defs == expected
def test_compatibility_with_data_frame_mapper(simple_dataset):
"""
Tests compatibility of generated feature definition with DataFrameMapper.
"""
features_defs = gen_features(
columns=['feat1', 'feat2'],
classes=[MockTransformer])
features_defs.append(('feat3', None))
mapper = DataFrameMapper(features_defs)
X = mapper.fit_transform(simple_dataset)
expected = np.asarray([
[1, 2, 1],
[1, 2, 2],
[1, 2, 3],
[1, 2, 4],
[1, 2, 5]
])
assert_array_equal(X, expected)
def assert_attributes(obj, **attrs):
for attr, value in attrs.items():
assert getattr(obj, attr) == value
|