1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
|
import numpy as np
from scipy.stats import rankdata as scipy_rankdata
from sklearn.utils.extmath import stable_cumsum
from sklearn.utils.deprecation import deprecated
# Remove in sklearn 0.21
@deprecated("sklearn.utils.stats.rankdata was deprecated in version 0.19 and "
"will be removed in 0.21. Use scipy.stats.rankdata instead.")
def rankdata(*args, **kwargs):
return scipy_rankdata(*args, **kwargs)
def _weighted_percentile(array, sample_weight, percentile=50):
"""
Compute the weighted ``percentile`` of ``array`` with ``sample_weight``.
"""
sorted_idx = np.argsort(array)
# Find index of median prediction for each sample
weight_cdf = stable_cumsum(sample_weight[sorted_idx])
percentile_idx = np.searchsorted(
weight_cdf, (percentile / 100.) * weight_cdf[-1])
# in rare cases, percentile_idx equals to len(sorted_idx)
percentile_idx = np.clip(percentile_idx, 0, len(sorted_idx)-1)
return array[sorted_idx[percentile_idx]]
|