File: _dist_metrics.pxd.tp

package info (click to toggle)
scikit-learn 1.2.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 23,280 kB
  • sloc: python: 184,491; cpp: 5,783; ansic: 854; makefile: 307; sh: 45; javascript: 1
file content (166 lines) | stat: -rw-r--r-- 5,105 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
{{py:

implementation_specific_values = [
    # Values are the following ones:
    #
    #       name_suffix, INPUT_DTYPE_t, INPUT_DTYPE
    #
    # On the first hand, an empty string is used for `name_suffix`
    # for the float64 case as to still be able to expose the original
    # float64 implementation under the same API, namely `DistanceMetric`.
    #
    # On the other hand, '32' bit is used for `name_suffix` for the float32
    # case to remove ambiguity and use `DistanceMetric32`, which is not
    # publicly exposed.
    #
    # The metric mapping is adapted accordingly to route to the correct
    # implementations.
    #
    # We also use 64bit types as defined in `sklearn.utils._typedefs`
    # to maintain backward compatibility at the symbol level for extra
    # safety.
    #
    ('', 'DTYPE_t', 'DTYPE'),
    ('32', 'cnp.float32_t', 'np.float32')
]

}}
cimport numpy as cnp
from libc.math cimport sqrt, exp

from ..utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t

{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}

######################################################################
# Inline distance functions
#
#  We use these for the default (euclidean) case so that they can be
#  inlined.  This leads to faster computation for the most common case
cdef inline DTYPE_t euclidean_dist{{name_suffix}}(
    const {{INPUT_DTYPE_t}}* x1,
    const {{INPUT_DTYPE_t}}* x2,
    ITYPE_t size,
) nogil except -1:
    cdef DTYPE_t tmp, d=0
    cdef cnp.intp_t j
    for j in range(size):
        tmp = <DTYPE_t> (x1[j] - x2[j])
        d += tmp * tmp
    return sqrt(d)


cdef inline DTYPE_t euclidean_rdist{{name_suffix}}(
    const {{INPUT_DTYPE_t}}* x1,
    const {{INPUT_DTYPE_t}}* x2,
    ITYPE_t size,
) nogil except -1:
    cdef DTYPE_t tmp, d=0
    cdef cnp.intp_t j
    for j in range(size):
        tmp = <DTYPE_t>(x1[j] - x2[j])
        d += tmp * tmp
    return d


cdef inline DTYPE_t euclidean_dist_to_rdist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) nogil except -1:
    return dist * dist


cdef inline DTYPE_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) nogil except -1:
    return sqrt(dist)


######################################################################
# DistanceMetric{{name_suffix}} base class
cdef class DistanceMetric{{name_suffix}}:
    # The following attributes are required for a few of the subclasses.
    # we must define them here so that cython's limited polymorphism will work.
    # Because we don't expect to instantiate a lot of these objects, the
    # extra memory overhead of this setup should not be an issue.
    cdef DTYPE_t p
    cdef DTYPE_t[::1] vec
    cdef DTYPE_t[:, ::1] mat
    cdef ITYPE_t size
    cdef object func
    cdef object kwargs

    cdef DTYPE_t dist(
        self,
        const {{INPUT_DTYPE_t}}* x1,
        const {{INPUT_DTYPE_t}}* x2,
        ITYPE_t size,
    ) nogil except -1

    cdef DTYPE_t rdist(
        self,
        const {{INPUT_DTYPE_t}}* x1,
        const {{INPUT_DTYPE_t}}* x2,
        ITYPE_t size,
    ) nogil except -1

    cdef DTYPE_t dist_csr(
        self,
        const {{INPUT_DTYPE_t}}* x1_data,
        const SPARSE_INDEX_TYPE_t[:] x1_indices,
        const {{INPUT_DTYPE_t}}* x2_data,
        const SPARSE_INDEX_TYPE_t[:] x2_indices,
        const SPARSE_INDEX_TYPE_t x1_start,
        const SPARSE_INDEX_TYPE_t x1_end,
        const SPARSE_INDEX_TYPE_t x2_start,
        const SPARSE_INDEX_TYPE_t x2_end,
        const ITYPE_t size,
    ) nogil except -1

    cdef DTYPE_t rdist_csr(
        self,
        const {{INPUT_DTYPE_t}}* x1_data,
        const SPARSE_INDEX_TYPE_t[:] x1_indices,
        const {{INPUT_DTYPE_t}}* x2_data,
        const SPARSE_INDEX_TYPE_t[:] x2_indices,
        const SPARSE_INDEX_TYPE_t x1_start,
        const SPARSE_INDEX_TYPE_t x1_end,
        const SPARSE_INDEX_TYPE_t x2_start,
        const SPARSE_INDEX_TYPE_t x2_end,
        const ITYPE_t size,
    ) nogil except -1

    cdef int pdist(
        self,
        const {{INPUT_DTYPE_t}}[:, ::1] X,
        DTYPE_t[:, ::1] D,
    ) except -1

    cdef int cdist(
        self,
        const {{INPUT_DTYPE_t}}[:, ::1] X,
        const {{INPUT_DTYPE_t}}[:, ::1] Y,
        DTYPE_t[:, ::1] D,
    ) except -1

    cdef int pdist_csr(
        self,
        const {{INPUT_DTYPE_t}}* x1_data,
        const SPARSE_INDEX_TYPE_t[:] x1_indices,
        const SPARSE_INDEX_TYPE_t[:] x1_indptr,
        const ITYPE_t size,
        DTYPE_t[:, ::1] D,
    ) nogil except -1

    cdef int cdist_csr(
        self,
        const {{INPUT_DTYPE_t}}* x1_data,
        const SPARSE_INDEX_TYPE_t[:] x1_indices,
        const SPARSE_INDEX_TYPE_t[:] x1_indptr,
        const {{INPUT_DTYPE_t}}* x2_data,
        const SPARSE_INDEX_TYPE_t[:] x2_indices,
        const SPARSE_INDEX_TYPE_t[:] x2_indptr,
        const ITYPE_t size,
        DTYPE_t[:, ::1] D,
    ) nogil except -1

    cdef DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) nogil except -1

    cdef DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) nogil except -1

{{endfor}}