1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
"""Nearest Neighbors graph functions"""
# Author: Jake Vanderplas <vanderplas@astro.washington.edu>
#
# License: BSD 3 clause (C) INRIA, University of Amsterdam
from .base import KNeighborsMixin, RadiusNeighborsMixin
from .unsupervised import NearestNeighbors
def _check_params(X, metric, p, metric_params):
"""Check the validity of the input parameters"""
params = zip(['metric', 'p', 'metric_params'],
[metric, p, metric_params])
est_params = X.get_params()
for param_name, func_param in params:
if func_param != est_params[param_name]:
raise ValueError(
"Got %s for %s, while the estimator has %s for "
"the same parameter." % (
func_param, param_name, est_params[param_name]))
def _query_include_self(X, include_self):
"""Return the query based on include_self param"""
if include_self:
query = X._fit_X
else:
query = None
return query
def kneighbors_graph(X, n_neighbors, mode='connectivity', metric='minkowski',
p=2, metric_params=None, include_self=False, n_jobs=None):
"""Computes the (weighted) graph of k-Neighbors for points in X
Read more in the :ref:`User Guide <unsupervised_neighbors>`.
Parameters
----------
X : array-like or BallTree, shape = [n_samples, n_features]
Sample data, in the form of a numpy array or a precomputed
:class:`BallTree`.
n_neighbors : int
Number of neighbors for each sample.
mode : {'connectivity', 'distance'}, optional
Type of returned matrix: 'connectivity' will return the connectivity
matrix with ones and zeros, and 'distance' will return the distances
between neighbors according to the given metric.
metric : string, default 'minkowski'
The distance metric used to calculate the k-Neighbors for each sample
point. The DistanceMetric class gives a list of available metrics.
The default distance is 'euclidean' ('minkowski' metric with the p
param equal to 2.)
p : int, default 2
Power parameter for the Minkowski metric. When p = 1, this is
equivalent to using manhattan_distance (l1), and euclidean_distance
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
metric_params : dict, optional
additional keyword arguments for the metric function.
include_self : bool, default=False.
Whether or not to mark each sample as the first nearest neighbor to
itself. If `None`, then True is used for mode='connectivity' and False
for mode='distance' as this will preserve backwards compatibility.
n_jobs : int or None, optional (default=None)
The number of parallel jobs to run for neighbors search.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
Returns
-------
A : sparse matrix in CSR format, shape = [n_samples, n_samples]
A[i, j] is assigned the weight of edge that connects i to j.
Examples
--------
>>> X = [[0], [3], [1]]
>>> from sklearn.neighbors import kneighbors_graph
>>> A = kneighbors_graph(X, 2, mode='connectivity', include_self=True)
>>> A.toarray()
array([[1., 0., 1.],
[0., 1., 1.],
[1., 0., 1.]])
See also
--------
radius_neighbors_graph
"""
if not isinstance(X, KNeighborsMixin):
X = NearestNeighbors(n_neighbors, metric=metric, p=p,
metric_params=metric_params, n_jobs=n_jobs).fit(X)
else:
_check_params(X, metric, p, metric_params)
query = _query_include_self(X, include_self)
return X.kneighbors_graph(X=query, n_neighbors=n_neighbors, mode=mode)
def radius_neighbors_graph(X, radius, mode='connectivity', metric='minkowski',
p=2, metric_params=None, include_self=False,
n_jobs=None):
"""Computes the (weighted) graph of Neighbors for points in X
Neighborhoods are restricted the points at a distance lower than
radius.
Read more in the :ref:`User Guide <unsupervised_neighbors>`.
Parameters
----------
X : array-like or BallTree, shape = [n_samples, n_features]
Sample data, in the form of a numpy array or a precomputed
:class:`BallTree`.
radius : float
Radius of neighborhoods.
mode : {'connectivity', 'distance'}, optional
Type of returned matrix: 'connectivity' will return the connectivity
matrix with ones and zeros, and 'distance' will return the distances
between neighbors according to the given metric.
metric : string, default 'minkowski'
The distance metric used to calculate the neighbors within a
given radius for each sample point. The DistanceMetric class
gives a list of available metrics. The default distance is
'euclidean' ('minkowski' metric with the param equal to 2.)
p : int, default 2
Power parameter for the Minkowski metric. When p = 1, this is
equivalent to using manhattan_distance (l1), and euclidean_distance
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
metric_params : dict, optional
additional keyword arguments for the metric function.
include_self : bool, default=False
Whether or not to mark each sample as the first nearest neighbor to
itself. If `None`, then True is used for mode='connectivity' and False
for mode='distance' as this will preserve backwards compatibility.
n_jobs : int or None, optional (default=None)
The number of parallel jobs to run for neighbors search.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
Returns
-------
A : sparse matrix in CSR format, shape = [n_samples, n_samples]
A[i, j] is assigned the weight of edge that connects i to j.
Examples
--------
>>> X = [[0], [3], [1]]
>>> from sklearn.neighbors import radius_neighbors_graph
>>> A = radius_neighbors_graph(X, 1.5, mode='connectivity',
... include_self=True)
>>> A.toarray()
array([[1., 0., 1.],
[0., 1., 0.],
[1., 0., 1.]])
See also
--------
kneighbors_graph
"""
if not isinstance(X, RadiusNeighborsMixin):
X = NearestNeighbors(radius=radius, metric=metric, p=p,
metric_params=metric_params, n_jobs=n_jobs).fit(X)
else:
_check_params(X, metric, p, metric_params)
query = _query_include_self(X, include_self)
return X.radius_neighbors_graph(query, radius, mode)
|