File: features_generator.py

package info (click to toggle)
sklearn-pandas 2.2.0-5
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 440 kB
  • sloc: python: 1,177; sh: 12; makefile: 8
file content (65 lines) | stat: -rw-r--r-- 2,183 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def gen_features(columns, classes=None, prefix='', suffix=''):
    """Generates a feature definition list which can be passed
    into DataFrameMapper

    Params:

    columns     a list of column names to generate features for.

    classes     a list of classes for each feature, a list of dictionaries with
                transformer class and init parameters, or None.

                If list of classes is provided, then each of them is
                instantiated with default arguments. Example:

                    classes = [StandardScaler, LabelBinarizer]

                If list of dictionaries is provided, then each of them should
                have a 'class' key with transformer class. All other keys are
                passed into 'class' value constructor. Example:

                    classes = [
                        {'class': StandardScaler, 'with_mean': False},
                        {'class': LabelBinarizer}
                    }]

                If None value selected, then each feature left as is.

    prefix      add prefix to transformed column names

    suffix      add suffix to transformed column names.

    """
    if classes is None:
        return [(column, None) for column in columns]

    feature_defs = []

    for column in columns:
        feature_transformers = []

        arguments = {}
        if prefix and prefix != "":
            arguments['prefix'] = prefix
        if suffix and suffix != "":
            arguments['suffix'] = suffix

        classes = [cls for cls in classes if cls is not None]
        if not classes:
            feature_defs.append((column, None, arguments))

        else:
            for definition in classes:
                if isinstance(definition, dict):
                    params = definition.copy()
                    klass = params.pop('class')
                    feature_transformers.append(klass(**params))
                else:
                    feature_transformers.append(definition())

            if not feature_transformers:
                feature_transformers = None

            feature_defs.append((column, feature_transformers, arguments))

    return feature_defs