1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
{{py:
implementation_specific_values = [
# Values are the following ones:
#
# name_suffix, INPUT_DTYPE_t, INPUT_DTYPE
#
# On the first hand, an empty string is used for `name_suffix`
# for the float64 case as to still be able to expose the original
# float64 implementation under the same API, namely `DistanceMetric`.
#
# On the other hand, '32' bit is used for `name_suffix` for the float32
# case to remove ambiguity and use `DistanceMetric32`, which is not
# publicly exposed.
#
# The metric mapping is adapted accordingly to route to the correct
# implementations.
#
# We also use 64bit types as defined in `sklearn.utils._typedefs`
# to maintain backward compatibility at the symbol level for extra
# safety.
#
('', 'DTYPE_t', 'DTYPE'),
('32', 'cnp.float32_t', 'np.float32')
]
}}
cimport numpy as cnp
from libc.math cimport sqrt, exp
from ..utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
######################################################################
# Inline distance functions
#
# We use these for the default (euclidean) case so that they can be
# inlined. This leads to faster computation for the most common case
cdef inline DTYPE_t euclidean_dist{{name_suffix}}(
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1:
cdef DTYPE_t tmp, d=0
cdef cnp.intp_t j
for j in range(size):
tmp = <DTYPE_t> (x1[j] - x2[j])
d += tmp * tmp
return sqrt(d)
cdef inline DTYPE_t euclidean_rdist{{name_suffix}}(
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1:
cdef DTYPE_t tmp, d=0
cdef cnp.intp_t j
for j in range(size):
tmp = <DTYPE_t>(x1[j] - x2[j])
d += tmp * tmp
return d
cdef inline DTYPE_t euclidean_dist_to_rdist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) nogil except -1:
return dist * dist
cdef inline DTYPE_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) nogil except -1:
return sqrt(dist)
######################################################################
# DistanceMetric{{name_suffix}} base class
cdef class DistanceMetric{{name_suffix}}:
# The following attributes are required for a few of the subclasses.
# we must define them here so that cython's limited polymorphism will work.
# Because we don't expect to instantiate a lot of these objects, the
# extra memory overhead of this setup should not be an issue.
cdef DTYPE_t p
cdef DTYPE_t[::1] vec
cdef DTYPE_t[:, ::1] mat
cdef ITYPE_t size
cdef object func
cdef object kwargs
cdef DTYPE_t dist(
self,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1
cdef DTYPE_t rdist(
self,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
) nogil except -1
cdef DTYPE_t dist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const {{INPUT_DTYPE_t}}* x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t x1_start,
const SPARSE_INDEX_TYPE_t x1_end,
const SPARSE_INDEX_TYPE_t x2_start,
const SPARSE_INDEX_TYPE_t x2_end,
const ITYPE_t size,
) nogil except -1
cdef DTYPE_t rdist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const {{INPUT_DTYPE_t}}* x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t x1_start,
const SPARSE_INDEX_TYPE_t x1_end,
const SPARSE_INDEX_TYPE_t x2_start,
const SPARSE_INDEX_TYPE_t x2_end,
const ITYPE_t size,
) nogil except -1
cdef int pdist(
self,
const {{INPUT_DTYPE_t}}[:, ::1] X,
DTYPE_t[:, ::1] D,
) except -1
cdef int cdist(
self,
const {{INPUT_DTYPE_t}}[:, ::1] X,
const {{INPUT_DTYPE_t}}[:, ::1] Y,
DTYPE_t[:, ::1] D,
) except -1
cdef int pdist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const SPARSE_INDEX_TYPE_t[:] x1_indptr,
const ITYPE_t size,
DTYPE_t[:, ::1] D,
) nogil except -1
cdef int cdist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const SPARSE_INDEX_TYPE_t[:] x1_indptr,
const {{INPUT_DTYPE_t}}* x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t[:] x2_indptr,
const ITYPE_t size,
DTYPE_t[:, ::1] D,
) nogil except -1
cdef DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) nogil except -1
cdef DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) nogil except -1
{{endfor}}
|