File: test_transformers.py

package info (click to toggle)
sklearn-pandas 2.2.0-5
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 440 kB
  • sloc: python: 1,177; sh: 12; makefile: 8
file content (47 lines) | stat: -rw-r--r-- 1,216 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import tempfile
import pytest
import numpy as np
from pandas import DataFrame
import joblib

from sklearn_pandas import DataFrameMapper
from sklearn_pandas import NumericalTransformer


@pytest.fixture
def simple_dataset():
    return DataFrame({
        'feat1': [1, 2, 1, 3, 1],
        'feat2': [1, 2, 2, 2, 3],
        'feat3': [1, 2, 3, 4, 5],
    })


def test_common_numerical_transformer(simple_dataset):
    """
    Test log transformation
    """
    transfomer = DataFrameMapper([
        ('feat1', NumericalTransformer('log'))
    ], df_out=True)
    df = simple_dataset
    outDF = transfomer.fit_transform(df)
    assert list(outDF.columns) == ['feat1']
    assert np.array_equal(df['feat1'].apply(np.log).values, outDF.feat1.values)


def test_numerical_transformer_serialization(simple_dataset):
    """
    Test if you can serialize transformer
    """
    transfomer = DataFrameMapper([
        ('feat1', NumericalTransformer('log'))
    ])

    df = simple_dataset
    transfomer.fit(df)
    f = tempfile.NamedTemporaryFile(delete=True)
    joblib.dump(transfomer, f.name)
    transfomer2 = joblib.load(f.name)
    np.array_equal(transfomer.transform(df), transfomer2.transform(df))
    f.close()