File: _misc.py

package info (click to toggle)
python-skbio 0.6.2-4
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 9,312 kB
  • sloc: python: 60,482; ansic: 672; makefile: 224
file content (252 lines) | stat: -rw-r--r-- 6,740 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# ----------------------------------------------------------------------------
# Copyright (c) 2013--, scikit-bio development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE.txt, distributed with this software.
# ----------------------------------------------------------------------------

import hashlib
import inspect
from types import FunctionType

import numpy as np


def resolve_key(obj, key):
    """Resolve key given an object and key."""
    if callable(key):
        return key(obj)
    elif hasattr(obj, "metadata"):
        return obj.metadata[key]
    raise TypeError(
        "Could not resolve key %r. Key must be callable or %s must"
        " have `metadata` attribute." % (key, obj.__class__.__name__)
    )


def make_sentinel(name):
    return type(
        name,
        (),
        {"__repr__": lambda s: name, "__str__": lambda s: name, "__class__": None},
    )()


def find_sentinels(function, sentinel):
    params = inspect.signature(function).parameters
    return [name for name, param in params.items() if param.default is sentinel]


class MiniRegistry(dict):
    def __call__(self, name):
        """Act as a decorator to register functions with self."""

        def decorator(func):
            self[name] = func
            return func

        return decorator

    def copy(self):
        """Use for inheritance."""
        return self.__class__(super(MiniRegistry, self).copy())

    def formatted_listing(self):
        """Produce an RST list with descriptions."""
        if len(self) == 0:
            return "\tNone"
        else:
            return "\n".join(
                [
                    "\t%r\n\t  %s" % (name, self[name].__doc__.split("\n")[0])
                    for name in sorted(self)
                ]
            )

    def interpolate(self, obj, name):
        """Inject the formatted listing in the second blank line of `name`."""
        f = getattr(obj, name)
        f2 = FunctionType(
            f.__code__,
            f.__globals__,
            name=f.__name__,
            argdefs=f.__defaults__,
            closure=f.__closure__,
        )

        # Conveniently the original docstring is on f2, not the new ones if
        # inheritance is happening. I have no idea why.
        t = f2.__doc__.split("\n\n")
        t.insert(2, self.formatted_listing())
        f2.__doc__ = "\n\n".join(t)

        setattr(obj, name, f2)


def chunk_str(s, n, char):
    """Insert `char` character every `n` characters in string `s`.

    Canonically pronounced "chunkster".

    """
    # Modified from http://stackoverflow.com/a/312464/3776794
    if n < 1:
        raise ValueError(
            "Cannot split string into chunks with n=%d. n must be >= 1." % n
        )
    return char.join((s[i : i + n] for i in range(0, len(s), n)))


def cardinal_to_ordinal(n):
    """Return ordinal string version of cardinal int `n`.

    Parameters
    ----------
    n : int
        Cardinal to convert to ordinal. Must be >= 0.

    Returns
    -------
    str
        Ordinal version of cardinal `n`.

    Raises
    ------
    ValueError
        If `n` is less than 0.

    Notes
    -----
    This function can be useful when writing human-readable error messages.

    Examples
    --------
    >>> from skbio.util import cardinal_to_ordinal
    >>> cardinal_to_ordinal(0)
    '0th'
    >>> cardinal_to_ordinal(1)
    '1st'
    >>> cardinal_to_ordinal(2)
    '2nd'
    >>> cardinal_to_ordinal(3)
    '3rd'

    """
    # Taken and modified from http://stackoverflow.com/a/20007730/3776794
    # Originally from http://codegolf.stackexchange.com/a/4712 by Gareth
    if n < 0:
        raise ValueError("Cannot convert negative integer %d to ordinal " "string." % n)
    return "%d%s" % (n, "tsnrhtdd"[(n // 10 % 10 != 1) * (n % 10 < 4) * n % 10 :: 4])


def safe_md5(open_file, block_size=2**20):
    """Compute an md5 sum without loading the file into memory.

    Parameters
    ----------
    open_file : file object
        open file handle to the archive to compute the checksum. It
        must be open as a binary file
    block_size : int, optional
        size of the block taken per iteration

    Returns
    -------
    md5 : md5 object from the hashlib module
        object with the loaded file

    Notes
    -----
    This method is based on the answers given in:
    http://stackoverflow.com/a/1131255/379593

    Examples
    --------
    >>> from io import BytesIO
    >>> from skbio.util import safe_md5
    >>> fd = BytesIO(b"foo bar baz") # open file like object
    >>> x = safe_md5(fd)
    >>> x.hexdigest()
    'ab07acbb1e496801937adfa772424bf7'
    >>> fd.close()

    """
    md5 = hashlib.md5()
    data = True
    while data:
        data = open_file.read(block_size)
        if data:
            md5.update(data)
    return md5


def find_duplicates(iterable):
    """Find duplicate elements in an iterable.

    Parameters
    ----------
    iterable : iterable
        Iterable to be searched for duplicates (i.e., elements that are
        repeated).

    Returns
    -------
    set
        Repeated elements in `iterable`.

    """
    # modified from qiita.qiita_db.util.find_repeated
    # https://github.com/biocore/qiita
    # see licenses/qiita.txt
    seen, repeated = set(), set()
    for e in iterable:
        if e in seen:
            repeated.add(e)
        else:
            seen.add(e)
    return repeated


def get_rng(seed=None):
    """Get a random generator.

    Parameters
    ----------
    seed : int or np.random.Generator, optional
        A user-provided random seed or random generator instance.

    Returns
    -------
    np.random.Generator
        Random generator instance.

    Notes
    -----
    NumPy's new random generator [1]_ was introduced in version 1.17. It is not
    backward compatible with ``RandomState``, the legacy random generator [2]_.
    See NEP 19 [3]_ for an introduction to this change.

    References
    ----------
    .. [1] https://numpy.org/devdocs/reference/random/generator.html

    .. [2] https://numpy.org/doc/stable/reference/random/legacy.html

    .. [3] https://numpy.org/neps/nep-0019-rng-policy.html

    """
    try:
        if seed is None or isinstance(seed, int):
            return np.random.default_rng(seed)
        if isinstance(seed, np.random.Generator):
            return seed
        raise ValueError(
            "Invalid seed. It must be an integer or an "
            "instance of np.random.Generator."
        )
    except AttributeError:
        raise ValueError(
            "The installed NumPy version does not support "
            "random.Generator. Please use NumPy >= 1.17."
        )