1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
|
import numpy as np
from scipy.stats import rankdata as _sp_rankdata
from .fixes import bincount
# To remove when we support scipy 0.13
def _rankdata(a, method="average"):
"""Assign ranks to data, dealing with ties appropriately.
Ranks begin at 1. The method argument controls how ranks are assigned
to equal values.
Parameters
----------
a : array_like
The array of values to be ranked. The array is first flattened.
method : str, optional
The method used to assign ranks to tied elements.
The options are 'max'.
'max': The maximum of the ranks that would have been assigned
to all the tied values is assigned to each value.
Returns
-------
ranks : ndarray
An array of length equal to the size of a, containing rank scores.
Notes
-----
We only backport the 'max' method
"""
if method != "max":
raise NotImplementedError()
unique_all, inverse = np.unique(a, return_inverse=True)
count = bincount(inverse, minlength=unique_all.size)
cum_count = count.cumsum()
rank = cum_count[inverse]
return rank
try:
_sp_rankdata([1.], 'max')
rankdata = _sp_rankdata
except TypeError as e:
rankdata = _rankdata
def _weighted_percentile(array, sample_weight, percentile=50):
"""Compute the weighted ``percentile`` of ``array`` with ``sample_weight``. """
sorted_idx = np.argsort(array)
# Find index of median prediction for each sample
weight_cdf = sample_weight[sorted_idx].cumsum()
percentile_idx = np.searchsorted(
weight_cdf, (percentile / 100.) * weight_cdf[-1])
return array[sorted_idx[percentile_idx]]
|