File: test_features_generator.py

package info (click to toggle)
sklearn-pandas 2.2.0-5
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 440 kB
  • sloc: python: 1,177; sh: 12; makefile: 8
file content (121 lines) | stat: -rw-r--r-- 3,320 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from collections import Counter

import pytest
import numpy as np
from pandas import DataFrame
from numpy.testing import assert_array_equal

from sklearn_pandas import DataFrameMapper
from sklearn_pandas.features_generator import gen_features


class MockClass(object):

    def __init__(self, value=1, name='class'):
        self.value = value
        self.name = name


class MockTransformer(object):

    def __init__(self):
        self.most_common_ = None

    def fit(self, X, y=None):
        [(value, _)] = Counter(X).most_common(1)
        self.most_common_ = value
        return self

    def transform(self, X, y=None):
        return np.asarray([self.most_common_] * len(X))


@pytest.fixture
def simple_dataset():
    return DataFrame({
        'feat1': [1, 2, 1, 3, 1],
        'feat2': [1, 2, 2, 2, 3],
        'feat3': [1, 2, 3, 4, 5],
    })


def test_generate_features_with_default_parameters():
    """
    Tests generating features from classes with default init arguments.
    """
    columns = ['colA', 'colB', 'colC']
    feature_defs = gen_features(columns=columns, classes=[MockClass])
    assert len(feature_defs) == len(columns)

    for feature in feature_defs:
        assert feature[2] == {}

    feature_dict = dict([_[0:2] for _ in feature_defs])
    assert columns == sorted(feature_dict.keys())

    # default init arguments for MockClass for clarification.
    expected = {'value': 1, 'name': 'class'}
    for column, transformers in feature_dict.items():
        for obj in transformers:
            assert_attributes(obj, **expected)


def test_generate_features_with_several_classes():
    """
    Tests generating features pipeline with different transformers parameters.
    """
    feature_defs = gen_features(
        columns=['colA', 'colB', 'colC'],
        classes=[
            {'class': MockClass},
            {'class': MockClass, 'name': 'mockA'},
            {'class': MockClass, 'name': 'mockB', 'value': None}
        ]
    )

    for col, transformers, params in feature_defs:
        assert_attributes(transformers[0], name='class', value=1)
        assert_attributes(transformers[1], name='mockA', value=1)
        assert_attributes(transformers[2], name='mockB', value=None)


def test_generate_features_with_none_only_transformers():
    """
    Tests generating "dummy" feature definition which doesn't apply any
    transformation.
    """
    feature_defs = gen_features(
        columns=['colA', 'colB', 'colC'], classes=[None])

    expected = [('colA', None, {}),
                ('colB', None, {}),
                ('colC', None, {})]

    assert feature_defs == expected


def test_compatibility_with_data_frame_mapper(simple_dataset):
    """
    Tests compatibility of generated feature definition with DataFrameMapper.
    """
    features_defs = gen_features(
        columns=['feat1', 'feat2'],
        classes=[MockTransformer])
    features_defs.append(('feat3', None))

    mapper = DataFrameMapper(features_defs)
    X = mapper.fit_transform(simple_dataset)
    expected = np.asarray([
        [1, 2, 1],
        [1, 2, 2],
        [1, 2, 3],
        [1, 2, 4],
        [1, 2, 5]
    ])

    assert_array_equal(X, expected)


def assert_attributes(obj, **attrs):
    for attr, value in attrs.items():
        assert getattr(obj, attr) == value