File: util.py

package info (click to toggle)
orange3 3.40.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,908 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (784 lines) | stat: -rw-r--r-- 26,180 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
"""
This module provides alternatives for the few additional functions found in
and once used from the bottlechest package (fork of bottleneck).

It also patches bottleneck to contain these functions.
"""
import warnings
from typing import Iterable

import bottleneck as bn
import numpy as np
import pandas
import scipy.stats.stats
from scipy import sparse as sp

from sklearn.utils.sparsefuncs import mean_variance_axis


def _eliminate_zeros(x):
    """Eliminate zeros from sparse matrix, or raise appropriate warning."""
    if hasattr(x, "eliminate_zeros"):
        x.eliminate_zeros()
    else:
        warnings.warn(
            f"`{x.__type__}` does not implement `eliminate_zeros`. Some values "
            f"in the sparse matrix may by explicit zeros."
        )


def _count_nans_per_row_sparse(X, weights, dtype=None):
    """ Count the number of nans (undefined) values per row. """
    if weights is not None:
        X = X.tocoo(copy=False)
        nonzero_mask = np.isnan(X.data)
        nan_rows, nan_cols = X.row[nonzero_mask], X.col[nonzero_mask]

        if weights.ndim == 1:
            data_weights = weights[nan_rows]
        else:
            data_weights = weights[nan_rows, nan_cols]

        w = sp.coo_matrix((data_weights, (nan_rows, nan_cols)), shape=X.shape)
        w = w.tocsr()
        return np.asarray(w.sum(axis=1), dtype=dtype).ravel()

    if isinstance(X, (sp.csr_matrix, sp.csc_matrix)):
        X = type(X)((np.isnan(X.data), X.indices, X.indptr), X.shape)
        return np.asarray(X.sum(axis=1), dtype=dtype).ravel()
    else:  # pragma: no cover
        raise TypeError("unsupported type '{}'".format(type(X).__name__))


def sparse_count_implicit_zeros(x):
    """ Count the number of implicit zeros in a sparse matrix. """
    if not sp.issparse(x):
        raise TypeError('The matrix provided was not sparse.')
    return np.prod(x.shape) - x.nnz


def sparse_has_implicit_zeros(x):
    """ Check if sparse matrix contains any implicit zeros. """
    if not sp.issparse(x):
        raise TypeError('The matrix provided was not sparse.')
    return np.prod(x.shape) != x.nnz


def sparse_implicit_zero_weights(x, weights):
    """ Extract the weight values of all zeros in a sparse matrix. """
    if not sp.issparse(x):
        raise TypeError('The matrix provided was not sparse.')

    if weights.ndim == 1:
        # Match weights and x axis so `indices` will be set appropriately
        if x.shape[0] == weights.shape[0]:
            x = x.tocsc()
        elif x.shape[1] == weights.shape[0]:
            x = x.tocsr()
        n_items = np.prod(x.shape)
        zero_indices = np.setdiff1d(np.arange(n_items), x.indices, assume_unique=True)
        return weights[zero_indices]
    else:
        # Can easily be implemented using a coo_matrix
        raise NotImplementedError(
            'Computing zero weights on ndimensinal weight matrix is not implemented'
        )


def bincount(x, weights=None, max_val=None, minlength=0):
    """Return counts of values in array X.

    Works kind of like np.bincount(), except that it also supports
    arrays with nans.

    Parameters
    ----------
    x : array_like, 1 dimension, nonnegative ints
        Input array.
    weights : array_like, optional
        Weights, array of the same shape as x.
    max_val : int, optional
        Indicates the maximum value we expect to find in X and sets the result
        array size accordingly. E.g. if we set `max_val=2` yet the largest
        value in X is 1, the result will contain a bin for the value 2, and
        will be set to 0. See examples for usage.
    minlength : int, optional
        A minimum number of bins for the output array. See numpy docs for info.

    Returns
    -------
    Tuple[np.ndarray, int]
        Returns the bincounts and the number of NaN values.

    Examples
    --------
    In case `max_val` is provided, the return shape includes bins for these
    values as well, even if they do not appear in the data. However, this will
    not truncate the bincount if values larger than `max_count` are found.
    >>> bincount([0, 0, 1, 1, 2], max_val=4)
    (array([2., 2., 1., 0., 0.]), 0.0)
    >>> bincount([0, 1, 2, 3, 4], max_val=2)
    (array([1., 1., 1., 1., 1.]), 0.0)

    """
    # Store the original matrix before any manipulation to check for sparse
    x_original = x
    if sp.issparse(x):
        if weights is not None:
            # Match weights and x axis so `indices` will be set appropriately
            if x.shape[0] == weights.shape[0]:
                x = x.tocsc()
            elif x.shape[1] == weights.shape[0]:
                x = x.tocsr()

            zero_weights = sparse_implicit_zero_weights(x, weights).sum()
            weights = weights[x.indices]
        else:
            zero_weights = sparse_count_implicit_zeros(x)

        x = x.data

    x = np.asanyarray(x, dtype=float)
    if bn.anynan(x):
        nonnan = ~np.isnan(x)
        x = x[nonnan]
        if weights is not None:
            nans = (~nonnan * weights).sum(axis=0)
            weights = weights[nonnan]
        else:
            nans = (~nonnan).sum(axis=0)
    else:
        nans = 0. if x.ndim == 1 else np.zeros(x.shape[1], dtype=float)

    if minlength == 0 and max_val is not None:
        minlength = max_val + 1

    bc = np.bincount(
        x.astype(np.int32, copy=False), weights=weights, minlength=minlength
    ).astype(float)
    # Since `csr_matrix.values` only contain non-zero values or explicit
    # zeros, we must count implicit zeros separately and add them to the
    # explicit ones found before
    if sp.issparse(x_original):
        # If x contains only NaNs, then bc will be an empty array
        if zero_weights and bc.size == 0:
            bc = [zero_weights]
        elif zero_weights:
            bc[0] += zero_weights

    return bc, nans


def countnans(x, weights=None, axis=None, dtype=None, keepdims=False):
    """
    Count the undefined elements in an array along given axis.

    Parameters
    ----------
    x : array_like
    weights : array_like, optional
        Weights to weight the nans with, before or after counting (depending
        on the weights shape).
    axis : int, optional
    dtype : dtype, optional
        The data type of the returned array.

    Returns
    -------
    Union[np.ndarray, float]

    """
    if not sp.issparse(x):
        x = np.asanyarray(x)
        is_nan = np.isnan(x)
        if weights is not None and weights.shape == x.shape:
            is_nan = is_nan * weights

        counts = is_nan.sum(axis=axis, dtype=dtype, keepdims=keepdims)
        if weights is not None and weights.shape != x.shape:
            counts = counts * weights
    else:
        assert axis in [None, 0, 1], 'Only axis 0 and 1 are currently supported'
        # To have consistent behaviour with dense matrices, raise error when
        # `axis=1` and the array is 1d (e.g. [[1 2 3]])
        if x.shape[0] == 1 and axis == 1:
            raise ValueError('Axis %d is out of bounds' % axis)

        arr = x if axis == 1 else x.T

        if weights is not None:
            weights = weights if axis == 1 else weights.T

        arr = arr.tocsr()
        counts = _count_nans_per_row_sparse(arr, weights, dtype=dtype)

        # We want a scalar value if `axis=None` or if the sparse matrix is
        # actually a vector (e.g. [[1 2 3]]), but has `ndim=2` due to scipy
        # implementation
        if axis is None or x.shape[0] == 1:
            counts = counts.sum(dtype=dtype)

    return counts


def contingency(X, y, max_X=None, max_y=None, weights=None, mask=None):
    """
    Compute the contingency matrices for each column of X (excluding the masked)
    versus the vector y.

    If the array is 1-dimensional, a 2d contingency matrix is returned. If the
    array is 2d, the function returns a 3d array, with the first dimension
    corresponding to column index (variable in the input array).

    The column of contingency matrix correspond to values of variables, the
    row correspond to values in vector `y`.

    Rows in the input array can be weighted (argument `weights`). A subset of
    columns can be selected by additional argument `mask`.

    The function also returns a count of NaN values per each value of `y`.

    Parameters
    ----------
    X : array_like
        With values in columns.
    y : 1d array
        Vector of true values.
    max_X : int
        The maximal value in the array
    max_y : int
        The maximal value in `y`
    weights : array_like
        Row weights. When not None, contingencies contain weighted counts
    mask : sequence
        Discrete columns of X.

    Returns
    -------
    contingencies: (m × ny × nx) array
        m number of masked (used) columns (all if mask=None), i.e.
        for each column of X;
        ny number of uniques in y,
        nx number of uniques in column of X.
    nans_cols : array_like
        Number of nans in each column of X for each unique value of y.
    nans_rows : array_like
        Number of nans in y for each unique value in columns of X.
    nans : array_like
        Number of nans in column of X and y at the same time.
    """
    was_1d = False
    if X.ndim == 1:
        X = X[..., np.newaxis]
        was_1d = True

    contingencies, nans_cols, nans_rows, nans = [], [], [], []
    ny = np.unique(y).size if max_y is None else max_y + 1
    for i in range(X.shape[1]):
        if mask is not None and not mask[i]:
            contingencies.append(np.zeros((ny, max_X + 1)))
            nans_cols.append(np.zeros(ny))
            nans_rows.append(None)
            nans.append(0)
            continue
        col = X[..., i]
        nx = np.unique(col[~np.isnan(col)]).size if max_X is None else max_X + 1
        if sp.issparse(col):
            col = np.ravel(col.todense())
        contingencies.append(
            bincount(y + ny * col,
                     minlength=ny * nx,
                     weights=weights)[0].reshape(nx, ny).T)

        nan_col_mask = np.isnan(col)
        nan_row_mask = np.isnan(y)
        nan_mask = nan_col_mask & nan_row_mask
        weights_ = np.ones(len(y)) if weights is None else weights

        nans_cols.append(
            bincount(y[nan_col_mask], weights=weights_[nan_col_mask],
                     minlength=ny)[0])
        nans_rows.append(
            bincount(col[nan_row_mask], weights=weights_[nan_row_mask],
                     minlength=nx)[0])
        nans.append(np.sum(nan_mask * weights_))
    if was_1d:
        return contingencies[0], nans_cols[0], nans_rows[0], nans[0]
    return np.array(contingencies), np.array(nans_cols), nans_rows, nans


def stats(X, weights=None, compute_variance=False):
    """
    Compute min, max, #nans, mean and variance.

    Result is a tuple (min, max, mean, variance, #nans, #non-nans) or an
    array of shape (len(X), 6).

    The mean and the number of nans and non-nans are weighted.

    Computation of variance requires an additional pass and is not enabled
    by default. Zeros are filled in instead of variance.

    Parameters
    ----------
    X : array_like, 1 or 2 dimensions
        Input array.
    weights : array_like, optional
        Weights, array of the same length as `x`.
    compute_variance : bool, optional
        If set to True, the function also computes variance.

    Returns
    -------
    out : a 6-element tuple or an array of shape (len(x), 6)
        Computed (min, max, mean, variance or 0, #nans, #non-nans)

    Raises
    ------
    ValueError
        If the length of the weight vector does not match the length of the
        array
    """
    is_numeric = np.issubdtype(X.dtype, np.number)
    is_sparse = sp.issparse(X)

    if X.size and is_numeric:
        if is_sparse:
            nans = countnans(X, axis=0)
            X = X.tocsc()
        else:
            nans = np.isnan(X).sum(axis=0)
        if compute_variance:
            means, vars = nan_mean_var(X, axis=0, weights=weights)
        else:
            means = nanmean(X, axis=0, weights=weights)
            vars = np.zeros(X.shape[1] if X.ndim == 2 else 1)
        return np.column_stack((
            nanmin(X, axis=0),
            nanmax(X, axis=0),
            means,
            vars,
            nans,
            X.shape[0] - nans))
    else:
        if X.ndim == 1:
            X = X[:, None]
        nans = (pandas.isnull(X).sum(axis=0) + (X == "").sum(axis=0)) \
            if X.size else np.zeros(X.shape[1])
        return np.column_stack((
            np.tile(np.inf, X.shape[1]),
            np.tile(-np.inf, X.shape[1]),
            np.zeros(X.shape[1]),
            np.zeros(X.shape[1]),
            nans,
            X.shape[0] - nans))


def _nan_min_max(x, func, axis=0):
    if not sp.issparse(x):
        return func(x, axis=axis)
    if axis is None:
        extreme = func(x.data, axis=axis) if x.nnz else float('nan')
        if sparse_has_implicit_zeros(x):
            extreme = func([0, extreme])
        return extreme
    if axis == 0:
        x = x.T
    else:
        assert axis == 1

    # TODO check & transform to correct format
    r = []
    for row in x:
        values = row.data
        extreme = func(values) if values.size else float('nan')
        if sparse_has_implicit_zeros(row):
            extreme = func([0, extreme])
        r.append(extreme)
    return np.array(r)


def nanmin(x, axis=None):
    """ Equivalent of np.nammin that supports sparse or dense matrices. """
    return _nan_min_max(x, np.nanmin, axis)


def nanmax(x, axis=None):
    """ Equivalent of np.nammax that supports sparse or dense matrices. """
    return _nan_min_max(x, np.nanmax, axis)


def mean(x):
    """ Equivalent of np.mean that supports sparse or dense matrices. """
    m = (np.sum(x.data) / np.prod(x.shape)
         if sp.issparse(x) else
         np.mean(x))
    if np.isnan(m):
        warnings.warn('mean() resulted in nan. If input can contain nan values,'
                      ' perhaps you meant nanmean?', stacklevel=2)
    return m


def _apply_func(x, dense_func, sparse_func, axis=None):
    """ General wrapper for a function depending on sparse or dense matrices. """
    if not sp.issparse(x):
        return dense_func(x, axis=axis)
    if axis is None:
        return sparse_func(x)
    if axis in [0, 1]:
        arr = x if axis == 1 else x.T
        arr = arr.tocsr()
        return np.fromiter((sparse_func(row) for row in arr),
                           dtype=np.double, count=arr.shape[0])
    else:
        raise NotImplementedError


def nansum(x, axis=None):
    """ Equivalent of np.nansum that supports sparse or dense matrices. """
    def nansum_sparse(x):
        return np.nansum(x.data)

    return _apply_func(x, np.nansum, nansum_sparse, axis=axis)


def nanmean(x, axis=None, weights=None):
    """ Equivalent of np.nanmean that supports sparse or dense matrices. """
    if axis is None and weights is not None:
        raise NotImplementedError("weights are only supported if axis is defined")

    if not sp.issparse(x):
        if weights is None:
            means = bn.nanmean(x, axis=axis)
        else:
            if axis == 0:
                weights = weights.reshape(-1, 1)
            elif axis == 1:
                weights = weights.reshape(1, -1)
            else:
                raise NotImplementedError
            nanw = ~np.isnan(x) * weights  # do not divide by non-used weights
            means = bn.nansum(x * weights, axis=axis) / np.sum(nanw, axis=axis)
    elif axis is None:
        means, _ = mean_variance_axis(x, axis=0)
        means = np.nanmean(means)
    else:
        # mean_variance_axis is picky regarding the input type
        if weights is not None:
            weights = weights.astype(float)
        means, _ = mean_variance_axis(x, axis=axis, weights=weights)

    return means


def nan_mean_var(x, axis=None, weights=None):
    """
    Computes means and variance of dense and sparse matrices.
    Supports weights. Based on mean_variance_axis.
    """
    if axis is None:
        raise NotImplementedError("axis=None is not supported")

    if not sp.issparse(x):
        if weights is None:
            means = bn.nanmean(x, axis=axis)
            variances = bn.nanvar(x, axis=axis)
        else:
            if axis == 0:
                weights = weights.reshape(-1, 1)
            elif axis == 1:
                weights = weights.reshape(1, -1)
            else:
                raise NotImplementedError

            nanw = ~np.isnan(x) * weights  # do not divide by non-used weights
            wsum = np.sum(nanw, axis=axis)
            means = bn.nansum(x * weights, axis=axis) / wsum

            if axis == 0:
                mr = means.reshape(1, -1)
            elif axis == 1:
                mr = means.reshape(-1, 1)

            variances = bn.nansum(((x - mr) ** 2) * weights, axis=axis) / wsum
    else:
        # mean_variance_axis is picky regarding the input type
        if weights is not None:
            weights = weights.astype(float)
        means, variances = mean_variance_axis(x, axis=axis, weights=weights)

    return means, variances


def nanvar(x, axis=None, ddof=0):
    """ Equivalent of np.nanvar that supports sparse or dense matrices. """
    def nanvar_sparse(x):
        n_vals = np.prod(x.shape) - np.sum(np.isnan(x.data))
        n_zeros = np.prod(x.shape) - len(x.data)
        avg = np.nansum(x.data) / n_vals
        return (np.nansum((x.data - avg) ** 2) + avg ** 2 * n_zeros) / (n_vals - ddof)

    return _apply_func(x, bn.nanvar, nanvar_sparse, axis=axis)


def nanstd(x, axis=None, ddof=0):
    """ Equivalent of np.nanstd that supports sparse and dense matrices. """
    return np.sqrt(nanvar(x, axis=axis, ddof=ddof))


def nanmedian(x, axis=None):
    """ Equivalent of np.nanmedian that supports sparse or dense matrices. """
    def nanmedian_sparse(x):
        nz = np.logical_not(np.isnan(x.data))
        n_nan = sum(np.isnan(x.data))
        n_nonzero = sum(x.data[nz] != 0)
        n_zeros = np.prod(x.shape) - n_nonzero - n_nan
        if n_zeros > n_nonzero:
            # Typical case if use of sparse matrices make sense
            return 0
        else:
            # Possibly contains NaNs and
            # more nz values than zeros, so allocating memory should not be too problematic
            return np.nanmedian(x.toarray())

    return _apply_func(x, np.nanmedian, nanmedian_sparse, axis=axis)


def nanmode(x, axis=0):
    """ A temporary replacement for a scipy.stats.mode.

    This function returns mode NaN if all values are NaN (scipy<1.2.0 wrongly
    returns zero). Also, this function returns count NaN if all values are NaN
    (scipy=1.3.0 returns some number)."""
    nans = np.isnan(np.array(x)).sum(axis=axis, keepdims=True) == x.shape[axis]
    res = scipy.stats.mode(x, axis, keepdims=True)
    # type(res) is ModeResult. ModeResult is defined in scipy.stats.stats; this
    # namespace is deprecated, but ModeResult is not exported to scipy.stats
    # Hence we use type(res) to avoid a warning.
    return type(res)(np.where(nans, np.nan, res.mode),
                     np.where(nans, np.nan, res.count))


def unique(x, return_counts=False):
    """ Equivalent of np.unique that supports sparse or dense matrices. """
    if not sp.issparse(x):
        return np.unique(x, return_counts=return_counts)

    implicit_zeros = sparse_count_implicit_zeros(x)
    explicit_zeros = not np.all(x.data)
    r = np.unique(x.data, return_counts=return_counts)
    if not implicit_zeros:
        return r

    if return_counts:
        zero_index = np.searchsorted(r[0], 0)
        if explicit_zeros:
            r[1][r[0] == 0.] += implicit_zeros
            return r
        return np.insert(r[0], zero_index, 0), np.insert(r[1], zero_index, implicit_zeros)
    else:
        if explicit_zeros:
            return r
        zero_index = np.searchsorted(r, 0)
        return np.insert(r, zero_index, 0)


def nanunique(*args, **kwargs):
    """ Return unique values while disregarding missing (np.nan) values.
    Supports sparse or dense matrices. """
    result = unique(*args, **kwargs)

    if isinstance(result, tuple):
        result, counts = result
        non_nan_mask = ~np.isnan(result)
        return result[non_nan_mask], counts[non_nan_mask]

    return result[~np.isnan(result)]


def digitize(x, bins, right=False):
    """Equivalent of np.digitize that supports sparse and dense matrices.

    If a sparse matrix is provided and the '0's belong to the '0'th bin, then
    a sparse matrix is returned.

    Because this can return both sparse and dense matrices, we must keep the
    return shape consistent. Since sparse matrices don't support 1d matrices,
    we reshape any returned 1d numpy array to a 2d matrix, with the first
    dimension shape being 1. This is equivalent to the behaviour of sparse
    matrices.

    Parameters
    ----------
    x : Union[np.ndarry, sp.csr_matrix, sp.csc_matrix]
    bins : np.ndarray
    right : Optional[bool]

    Returns
    -------
    Union[np.ndarray, sp.csr_matrix]

    """
    if not sp.issparse(x):
        # TODO Remove reshaping logic when support for numpy==1.9 is dropped
        original_shape = x.shape
        x = x.flatten()
        result = np.digitize(x, bins, right)
        result = result.reshape(original_shape)
        # In order to keep the return shape consistent, and sparse matrices
        # don't support 1d matrices, make sure to convert 1d to 2d matrices
        if result.ndim == 1:
            result = result.reshape(((1,) + result.shape))
        return result

    # Find the bin where zeros belong, depending on the `right` parameter
    zero_bin = np.searchsorted(bins, 0, side=['right', 'left'][right])

    if zero_bin == 0:
        r = sp.lil_matrix(x.shape, dtype=np.int64)
    else:
        r = zero_bin * np.ones(x.shape, dtype=np.int64)

    for idx, row in enumerate(x.tocsr()):
        # TODO Remove this check when support for numpy==1.9 is dropped
        if row.nnz > 0:
            r[idx, row.indices] = np.digitize(row.data, bins, right)

    # Orange mainly deals with `csr_matrix`, but `lil_matrix` is more efficient
    # for incremental building
    if sp.issparse(r):
        r = r.tocsr()

    return r


def var(x, axis=None, ddof=0):
    """ Equivalent of np.var that supports sparse and dense matrices. """
    if not sp.issparse(x):
        return np.var(x, axis, ddof=ddof)

    result = x.multiply(x).mean(axis) - np.square(x.mean(axis))
    result = np.squeeze(np.asarray(result))

    # Apply correction for degrees of freedom
    n = np.prod(x.shape) if axis is None else x.shape[axis]
    result *= n / (n - ddof)

    return result


def std(x, axis=None, ddof=0):
    """ Equivalent of np.std that supports sparse and dense matrices. """
    return np.sqrt(var(x, axis=axis, ddof=ddof))


def nan_to_num(x, copy=True):
    """ Equivalent of np.nan_to_num that supports sparse and dense matrices. """
    if not sp.issparse(x):
        return np.nan_to_num(x, copy=copy)

    if copy:
        x = x.copy()
    np.nan_to_num(x.data, copy=False)
    _eliminate_zeros(x)
    return x


def isnan(x, out=None):
    """ Equivalent of np.isnan that supports sparse and dense matrices. """
    if not sp.issparse(x):
        return np.isnan(x, out=out)

    if out is None:
        x = x.copy()
    elif out is not x:
        raise ValueError(
            "The `out` parameter can only be set `x` when using sparse matrices"
        )

    np.isnan(x.data, out=x.data)
    _eliminate_zeros(x)
    return x


def any_nan(x, axis=None):
    """ Check if any of the values in a matrix is nan. """
    if not sp.issparse(x):
        return np.isnan(x).any(axis=axis)

    if axis is None:
        return np.isnan(x.data).any()

    if axis == 0:
        x = x.tocsc()
    elif axis == 1:
        x = x.tocsr()

    ax = x.ndim - axis - 1
    result = np.zeros(x.shape[ax], dtype=bool)
    for i in range(x.shape[ax]):
        vals = x.data[x.indptr[i]:x.indptr[i + 1]]
        result[i] = np.isnan(vals).any()

    return result


def all_nan(x, axis=None):
    """
    Check if all of the values in a matrix is nan. Works for sparse matrix too.
    """
    if not sp.issparse(x):
        return np.isnan(x).all(axis=axis)

    if axis is None:
        # when x.nnz < actual shape there are zero values which are not nan
        return np.prod(x.shape) == x.nnz and np.isnan(x.data).all()

    if axis == 0:
        x = x.tocsc()
    elif axis == 1:
        x = x.tocsr()

    ax = x.ndim - axis - 1
    axis_len = x.shape[axis]
    result = np.zeros(x.shape[ax], dtype=bool)
    for i in range(x.shape[ax]):
        vals = x.data[x.indptr[i]:x.indptr[i + 1]]
        result[i] = axis_len == len(vals) and np.isnan(vals).all()

    return result


def FDR(p_values: Iterable, dependent=False, m=None, ordered=False) -> Iterable:
    """ `False Discovery Rate <http://en.wikipedia.org/wiki/False_discovery_rate>`_
        correction on a list of p-values.

    :param p_values: list or np.ndarray of p-values.
    :param dependent: use correction for dependent hypotheses (default False).
    :param m: number of hypotheses tested (default ``len(p_values)``).
    :param ordered: prevent sorting of p-values if they are already sorted
        (default False).
    :return: list or np.ndarray, same as the input
    """
    if p_values is None or len(p_values) == 0 or \
            (m is not None and m <= 0):
        return None

    is_list = isinstance(p_values, list)
    p_values = np.array(p_values)
    if m is None:
        m = len(p_values)
    if not ordered:
        ordered = (np.diff(p_values) >= 0).all()
        if not ordered:
            indices = np.argsort(p_values)
            p_values = p_values[indices]

    if dependent:  # correct q for dependent tests
        m *= sum(1 / np.arange(1, m + 1))

    fdrs = (p_values * m / np.arange(1, len(p_values) + 1))[::-1]
    fdrs = np.array(np.minimum.accumulate(fdrs)[::-1])
    if not ordered:
        fdrs[indices] = fdrs.copy()
    return fdrs if not is_list else list(fdrs)