File: _feature_agglomeration.py

package info (click to toggle)
scikit-learn 0.20.2%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 51,036 kB
  • sloc: python: 108,171; ansic: 8,722; cpp: 5,651; makefile: 192; sh: 40
file content (81 lines) | stat: -rw-r--r-- 2,665 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
Feature agglomeration. Base classes and functions for performing feature
agglomeration.
"""
# Author: V. Michel, A. Gramfort
# License: BSD 3 clause

import numpy as np

from ..base import TransformerMixin
from ..utils import check_array
from ..utils.validation import check_is_fitted
from scipy.sparse import issparse

###############################################################################
# Mixin class for feature agglomeration.


class AgglomerationTransform(TransformerMixin):
    """
    A class for feature agglomeration via the transform interface
    """

    pooling_func = np.mean

    def transform(self, X):
        """
        Transform a new matrix using the built clustering

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features] or [n_features]
            A M by N array of M observations in N dimensions or a length
            M array of M one-dimensional observations.

        Returns
        -------
        Y : array, shape = [n_samples, n_clusters] or [n_clusters]
            The pooled values for each feature cluster.
        """
        check_is_fitted(self, "labels_")

        pooling_func = self.pooling_func
        X = check_array(X)
        if len(self.labels_) != X.shape[1]:
            raise ValueError("X has a different number of features than "
                             "during fitting.")
        if pooling_func == np.mean and not issparse(X):
            size = np.bincount(self.labels_)
            n_samples = X.shape[0]
            # a fast way to compute the mean of grouped features
            nX = np.array([np.bincount(self.labels_, X[i, :]) / size
                          for i in range(n_samples)])
        else:
            nX = []
            for l in np.unique(self.labels_):
                nX.append(pooling_func(X[:, self.labels_ == l], axis=1))
            nX = np.array(nX).T
        return nX

    def inverse_transform(self, Xred):
        """
        Inverse the transformation.
        Return a vector of size nb_features with the values of Xred assigned
        to each group of features

        Parameters
        ----------
        Xred : array-like, shape=[n_samples, n_clusters] or [n_clusters,]
            The values to be assigned to each cluster of samples

        Returns
        -------
        X : array, shape=[n_samples, n_features] or [n_features]
            A vector of size n_samples with the values of Xred assigned to
            each of the cluster of samples.
        """
        check_is_fitted(self, "labels_")

        unil, inverse = np.unique(self.labels_, return_inverse=True)
        return Xred[..., inverse]