1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
|
# Authors: Gilles Louppe <g.louppe@gmail.com>
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Arnaud Joly <arnaud.v.joly@gmail.com>
# Jacob Schreiber <jmschreiber91@gmail.com>
# Nelson Liu <nelson@nelsonliu.me>
#
# License: BSD 3 clause
# See _utils.pyx for details.
import numpy as np
cimport numpy as np
from _tree cimport Node
from sklearn.neighbors.quad_tree cimport Cell
ctypedef np.npy_float32 DTYPE_t # Type of X
ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight
ctypedef np.npy_intp SIZE_t # Type for indices and counters
ctypedef np.npy_int32 INT32_t # Signed 32 bit integer
ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer
cdef enum:
# Max value for our rand_r replacement (near the bottom).
# We don't use RAND_MAX because it's different across platforms and
# particularly tiny on Windows/MSVC.
RAND_R_MAX = 0x7FFFFFFF
# safe_realloc(&p, n) resizes the allocation of p to n * sizeof(*p) bytes or
# raises a MemoryError. It never calls free, since that's __dealloc__'s job.
# cdef DTYPE_t *p = NULL
# safe_realloc(&p, n)
# is equivalent to p = malloc(n * sizeof(*p)) with error checking.
ctypedef fused realloc_ptr:
# Add pointer types here as needed.
(DTYPE_t*)
(SIZE_t*)
(unsigned char*)
(WeightedPQueueRecord*)
(DOUBLE_t*)
(DOUBLE_t**)
(Node*)
(Cell*)
(Node**)
(StackRecord*)
(PriorityHeapRecord*)
cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except *
cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size)
cdef SIZE_t rand_int(SIZE_t low, SIZE_t high,
UINT32_t* random_state) nogil
cdef double rand_uniform(double low, double high,
UINT32_t* random_state) nogil
cdef double log(double x) nogil
# =============================================================================
# Stack data structure
# =============================================================================
# A record on the stack for depth-first tree growing
cdef struct StackRecord:
SIZE_t start
SIZE_t end
SIZE_t depth
SIZE_t parent
bint is_left
double impurity
SIZE_t n_constant_features
cdef class Stack:
cdef SIZE_t capacity
cdef SIZE_t top
cdef StackRecord* stack_
cdef bint is_empty(self) nogil
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
bint is_left, double impurity,
SIZE_t n_constant_features) nogil except -1
cdef int pop(self, StackRecord* res) nogil
# =============================================================================
# PriorityHeap data structure
# =============================================================================
# A record on the frontier for best-first tree growing
cdef struct PriorityHeapRecord:
SIZE_t node_id
SIZE_t start
SIZE_t end
SIZE_t pos
SIZE_t depth
bint is_leaf
double impurity
double impurity_left
double impurity_right
double improvement
cdef class PriorityHeap:
cdef SIZE_t capacity
cdef SIZE_t heap_ptr
cdef PriorityHeapRecord* heap_
cdef bint is_empty(self) nogil
cdef void heapify_up(self, PriorityHeapRecord* heap, SIZE_t pos) nogil
cdef void heapify_down(self, PriorityHeapRecord* heap, SIZE_t pos, SIZE_t heap_length) nogil
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
SIZE_t depth, bint is_leaf, double improvement,
double impurity, double impurity_left,
double impurity_right) nogil except -1
cdef int pop(self, PriorityHeapRecord* res) nogil
# =============================================================================
# WeightedPQueue data structure
# =============================================================================
# A record stored in the WeightedPQueue
cdef struct WeightedPQueueRecord:
DOUBLE_t data
DOUBLE_t weight
cdef class WeightedPQueue:
cdef SIZE_t capacity
cdef SIZE_t array_ptr
cdef WeightedPQueueRecord* array_
cdef bint is_empty(self) nogil
cdef int reset(self) nogil except -1
cdef SIZE_t size(self) nogil
cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1
cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil
cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil
cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil
cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil
cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil
# =============================================================================
# WeightedMedianCalculator data structure
# =============================================================================
cdef class WeightedMedianCalculator:
cdef SIZE_t initial_capacity
cdef WeightedPQueue samples
cdef DOUBLE_t total_weight
cdef SIZE_t k
cdef DOUBLE_t sum_w_0_k # represents sum(weights[0:k])
# = w[0] + w[1] + ... + w[k-1]
cdef SIZE_t size(self) nogil
cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1
cdef int reset(self) nogil except -1
cdef int update_median_parameters_post_push(
self, DOUBLE_t data, DOUBLE_t weight,
DOUBLE_t original_median) nogil
cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil
cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil
cdef int update_median_parameters_post_remove(
self, DOUBLE_t data, DOUBLE_t weight,
DOUBLE_t original_median) nogil
cdef DOUBLE_t get_median(self) nogil
|