1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
# encoding: utf-8
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
#
# Author: Peter Prettenhofer
#
# License: BSD Style.
cimport cython
import numpy as np
cimport numpy as np
# Define a datatype for the data array
DTYPE = np.float32
ctypedef np.float32_t DTYPE_t
cdef void _predict_regression_tree_inplace_fast(DTYPE_t *X,
np.int32_t *children,
np.int32_t *feature,
np.float64_t *threshold,
np.float64_t * value,
double scale,
Py_ssize_t k,
Py_ssize_t K,
Py_ssize_t n_samples,
Py_ssize_t n_features,
np.float64_t *out):
"""Predicts output for regression tree and stores it in ``out[i, k]``.
This function operates directly on the data arrays of the tree
data structures. This is 5x faster than the variant above because
it allows us to avoid buffer validation.
Parameters
----------
X : DTYPE_t pointer
The pointer to the data array of the input ``X``.
children : np.int32_t pointer
The pointer to the data array of the ``children`` array attribute
of the :class:``sklearn.tree.Tree``.
feature : np.int32_t pointer
The pointer to the data array of the ``feature`` array attribute
of the :class:``sklearn.tree.Tree``.
threshold : np.float64_t pointer
The pointer to the data array of the ``threshold`` array attribute
of the :class:``sklearn.tree.Tree``.
value : np.float64_t pointer
The pointer to the data array of the ``value`` array attribute
of the :class:``sklearn.tree.Tree``.
scale : double
A constant to scale the predictions.
k : int
The index of the tree output to be predicted. Must satisfy
0 <= ``k`` < ``K``.
K : int
The number of regression tree outputs. For regression and
binary classification ``K == 1``, for multi-class
classification ``K == n_classes``.
n_samples : int
The number of samples in the input array ``X``;
``n_samples == X.shape[0]``.
n_features : int
The number of features; ``n_samples == X.shape[1]``.
out : np.float64_t pointer
The pointer to the data array where the predictions are stored.
``out`` is assumed to be a two-dimensional array of
shape ``(n_samples, K)``.
"""
cdef Py_ssize_t i
cdef np.int32_t node_id
cdef np.int32_t feature_idx
cdef int stride = 2 # children.shape[1]
for i in range(n_samples):
node_id = 0
# While node_id not a leaf
while children[node_id * stride] != -1 and \
children[(node_id * stride) + 1] != -1:
feature_idx = feature[node_id]
if X[(i * n_features) + feature_idx] <= threshold[node_id]:
node_id = children[node_id * stride]
else:
node_id = children[(node_id * stride) + 1]
out[(i * K) + k] += scale * value[node_id]
@cython.nonecheck(False)
def predict_stages(np.ndarray[object, ndim=2] estimators,
np.ndarray[DTYPE_t, ndim=2] X, double scale,
np.ndarray[np.float64_t, ndim=2] out):
"""Add predictions of ``estimators`` to ``out``.
Each estimator is scaled by ``scale`` before its prediction
is added to ``out``.
"""
cdef Py_ssize_t i
cdef Py_ssize_t k
cdef Py_ssize_t n_estimators = estimators.shape[0]
cdef Py_ssize_t n_samples = X.shape[0]
cdef Py_ssize_t n_features = X.shape[1]
cdef Py_ssize_t K = estimators.shape[1]
cdef object tree
for i in range(n_estimators):
for k in range(K):
tree = estimators[i, k]
# avoid buffer validation by casting to ndarray
# and get data pointer
# need brackets because of casting operator priority
_predict_regression_tree_inplace_fast(
<DTYPE_t*>(X.data),
<np.int32_t*>((<np.ndarray>(tree.children)).data),
<np.int32_t*>((<np.ndarray>(tree.feature)).data),
<np.float64_t*>((<np.ndarray>(tree.threshold)).data),
<np.float64_t*>((<np.ndarray>(tree.value)).data),
scale, k, K, n_samples, n_features,
<np.float64_t*>((<np.ndarray>out).data))
@cython.nonecheck(False)
def predict_stage(np.ndarray[object, ndim=2] estimators,
int stage,
np.ndarray[DTYPE_t, ndim=2] X, double scale,
np.ndarray[np.float64_t, ndim=2] out):
"""Add predictions of ``estimators[stage]`` to ``out``.
Each estimator in the stage is scaled by ``scale`` before
its prediction is added to ``out``.
"""
cdef Py_ssize_t i
cdef Py_ssize_t k
cdef Py_ssize_t n_estimators = estimators.shape[0]
cdef Py_ssize_t n_samples = X.shape[0]
cdef Py_ssize_t n_features = X.shape[1]
cdef Py_ssize_t K = estimators.shape[1]
cdef object tree
for k in range(K):
tree = estimators[stage, k]
_predict_regression_tree_inplace_fast(
<DTYPE_t*>(X.data),
<np.int32_t*>((<np.ndarray>(tree.children)).data),
<np.int32_t*>((<np.ndarray>(tree.feature)).data),
<np.float64_t*>((<np.ndarray>(tree.threshold)).data),
<np.float64_t*>((<np.ndarray>(tree.value)).data),
scale, k, K, n_samples, n_features,
<np.float64_t*>((<np.ndarray>out).data))
|