File: _discretize.pyx

package info (click to toggle)
orange3 3.40.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,912 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (95 lines) | stat: -rw-r--r-- 2,633 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#cython: embedsignature=True
#cython: language_level=3

import numpy
cimport numpy as np
import cython
from libc.math cimport log
from numpy cimport NPY_FLOAT64 as NPY_float64

@cython.boundscheck(False)
@cython.wraparound(False)
def split_eq_freq(np.ndarray[np.float64_t, ndim=2] dist not None, int n):

    cdef int llen = dist.shape[1]

    if n >= llen: #n is greater than distributions
        return [(v1+v2)/2 for v1,v2 in zip(dist[0], dist[0][1:])]

    cdef np.float64_t N = dist[1].sum()
    cdef int toGo = n
    cdef np.float64_t inthis = 0
    cdef np.float64_t prevel = -1
    cdef np.float64_t inone = N/toGo
    points = []

    cdef Py_ssize_t i
    cdef np.float64_t v
    cdef np.float64_t k
    cdef np.float64_t vn

    for i in range(llen):
        v = dist[0,i]
        k = dist[1,i]
        if toGo <= 1:
            break
        inthis += k
        if inthis < inone or i == 0: 
            prevel = v
        else: #current count exceeded
            if i < llen - 1 and inthis - inone < k / 2:
                #exceeded for less than half the current count:
                #split after current
                vn = dist[0,i+1]
                points.append((vn + v)/2)
                N -= inthis
                inthis = 0
                prevel = vn
            else:
                #split before the current value
                points.append((prevel + v)/2)
                N -= inthis - k
                inthis = k
                prevel = v
            toGo -= 1
            if toGo:
                inone = N/toGo
    return points


@cython.wraparound(False)
@cython.boundscheck(False)
def entropy_normalized1(np.ndarray[np.float64_t, ndim=1] D):
    """
    Compute entropy of distribution in `D` (must be normalized).
    """
    cdef np.float64_t R = 0.
    cdef Py_ssize_t j
    cdef np.float64_t t
    cdef np.float64_t log2 = 1./log(2.)
    for j in range(D.shape[0]):
        t = D[j]
        if t > 0.:
            if t > 1.0: t = 1.0
            R -= t*log(t)*log2
    return R


@cython.wraparound(False)
@cython.boundscheck(False)
def entropy_normalized2(np.ndarray[np.float64_t, ndim=2] D):
    """
    Compute entropy of distributions in `D`.
    Rows in `D` must be a distribution (i.e. sum to 1.0 over `axis`).
    """
    cdef np.ndarray[np.float64_t, ndim=1] R = numpy.zeros(D.shape[0])
    cdef Py_ssize_t i,j
    cdef np.float64_t t
    cdef np.float64_t log2 = 1./log(2.)
    for i in range(D.shape[0]):
        for j in range(D.shape[1]):
            t = D[i,j]
            if t > 0.:
                if t > 1.0: t = 1.0
                R[i] -= t*log(t)*log2
    return R