1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
{{py:
implementation_specific_values = [
# Values are the following ones:
#
# name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE
#
# We also use the float64 dtype and C-type names as defined in
# `sklearn.utils._typedefs` to maintain consistency.
#
('64', False, 'DTYPE_t', 'DTYPE'),
('32', True, 'cnp.float32_t', 'np.float32')
]
}}
cimport numpy as cnp
from libcpp.vector cimport vector
from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t
cdef void _middle_term_sparse_sparse_64(
const DTYPE_t[:] X_data,
const SPARSE_INDEX_TYPE_t[:] X_indices,
const SPARSE_INDEX_TYPE_t[:] X_indptr,
ITYPE_t X_start,
ITYPE_t X_end,
const DTYPE_t[:] Y_data,
const SPARSE_INDEX_TYPE_t[:] Y_indices,
const SPARSE_INDEX_TYPE_t[:] Y_indptr,
ITYPE_t Y_start,
ITYPE_t Y_end,
DTYPE_t * D,
) nogil
{{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
cdef class MiddleTermComputer{{name_suffix}}:
cdef:
ITYPE_t effective_n_threads
ITYPE_t chunks_n_threads
ITYPE_t dist_middle_terms_chunks_size
ITYPE_t n_features
ITYPE_t chunk_size
# Buffers for the `-2 * X_c @ Y_c.T` term computed via GEMM
vector[vector[DTYPE_t]] dist_middle_terms_chunks
cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
) nogil
cdef void _parallel_on_X_parallel_init(self, ITYPE_t thread_num) nogil
cdef void _parallel_on_X_init_chunk(
self,
ITYPE_t thread_num,
ITYPE_t X_start,
ITYPE_t X_end,
) nogil
cdef void _parallel_on_Y_init(self) nogil
cdef void _parallel_on_Y_parallel_init(
self,
ITYPE_t thread_num,
ITYPE_t X_start,
ITYPE_t X_end,
) nogil
cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num
) nogil
cdef DTYPE_t * _compute_dist_middle_terms(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
) nogil
cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}):
cdef:
const {{INPUT_DTYPE_t}}[:, ::1] X
const {{INPUT_DTYPE_t}}[:, ::1] Y
{{if upcast_to_float64}}
# Buffers for upcasting chunks of X and Y from 32bit to 64bit
vector[vector[DTYPE_t]] X_c_upcast
vector[vector[DTYPE_t]] Y_c_upcast
{{endif}}
cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
) nogil
cdef void _parallel_on_X_init_chunk(
self,
ITYPE_t thread_num,
ITYPE_t X_start,
ITYPE_t X_end,
) nogil
cdef void _parallel_on_Y_parallel_init(
self,
ITYPE_t thread_num,
ITYPE_t X_start,
ITYPE_t X_end,
) nogil
cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num
) nogil
cdef DTYPE_t * _compute_dist_middle_terms(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
) nogil
cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}):
cdef:
const DTYPE_t[:] X_data
const SPARSE_INDEX_TYPE_t[:] X_indices
const SPARSE_INDEX_TYPE_t[:] X_indptr
const DTYPE_t[:] Y_data
const SPARSE_INDEX_TYPE_t[:] Y_indices
const SPARSE_INDEX_TYPE_t[:] Y_indptr
cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num
) nogil
cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num
) nogil
cdef DTYPE_t * _compute_dist_middle_terms(
self,
ITYPE_t X_start,
ITYPE_t X_end,
ITYPE_t Y_start,
ITYPE_t Y_end,
ITYPE_t thread_num,
) nogil
{{endfor}}
|