File: stats.py

package info (click to toggle)
scikit-learn 0.20.2%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 51,036 kB
  • sloc: python: 108,171; ansic: 8,722; cpp: 5,651; makefile: 192; sh: 40
file content (27 lines) | stat: -rw-r--r-- 998 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import numpy as np
from scipy.stats import rankdata as scipy_rankdata

from sklearn.utils.extmath import stable_cumsum
from sklearn.utils.deprecation import deprecated


# Remove in sklearn 0.21
@deprecated("sklearn.utils.stats.rankdata was deprecated in version 0.19 and "
            "will be removed in 0.21. Use scipy.stats.rankdata instead.")
def rankdata(*args, **kwargs):
    return scipy_rankdata(*args, **kwargs)


def _weighted_percentile(array, sample_weight, percentile=50):
    """
    Compute the weighted ``percentile`` of ``array`` with ``sample_weight``.
    """
    sorted_idx = np.argsort(array)

    # Find index of median prediction for each sample
    weight_cdf = stable_cumsum(sample_weight[sorted_idx])
    percentile_idx = np.searchsorted(
        weight_cdf, (percentile / 100.) * weight_cdf[-1])
    # in rare cases, percentile_idx equals to len(sorted_idx)
    percentile_idx = np.clip(percentile_idx, 0, len(sorted_idx)-1)
    return array[sorted_idx[percentile_idx]]