1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
# Copyright (c) DataLab Platform Developers, BSD 3-Clause license, see LICENSE file.
"""
I/O conversion functions
"""
# pylint: disable=invalid-name # Allows short reference names like x, y, ...
from __future__ import annotations
from typing import Any, Sequence
import numpy as np
import skimage
def dtypes_to_sorted_short_codes(
dtypes: Sequence[Any], kind_filter: str | None = None
) -> list[str]:
"""Return sorted short dtype codes for numeric dtypes.
Convert each input to a numpy dtype and ignore non-numeric types.
Order:
- Integer types first, unsigned (and boolean) before signed,
sorted by itemsize ascending.
- floats numeric types, sorted by itemsize ascending.
- complex numeric types, sorted by itemsize ascending.
Short codes use numpy kind letter plus itemsize in bytes, e.g. "u1", "i2",
"f8".
Args:
dtypes: Sequence of objects acceptable by numpy.dtype (dtype, str, etc.)
kind_filter: String of dtype kind letters to keep, e.g. "iu" for
unsigned/signed integers. If empty or None, keep all numeric types
Returns:
List of unique short dtype codes in the requested order.
"""
dtypes = [np.dtype(d).str[1:] for d in dtypes]
ordered: list[np.dtype] = []
if kind_filter is None:
kind_filter = "iubfc" # all numeric types
assert kind_filter != "", "kind_filter cannot be empty string"
# Standard dtype codes in desired order
bool_codes = ("b1",)
int_codes = ("u1", "i1", "u2", "i2", "u4", "i4", "u8", "i8")
float_codes = ("f2", "f4", "f8")
complex_codes = ("c8", "c16")
ordered = [
code
for code in bool_codes + int_codes + float_codes + complex_codes
if code in dtypes and code[0] in kind_filter
]
return ordered
def _convert_bool_array(array: np.ndarray) -> np.ndarray:
"""Convert boolean array to uint8."""
return skimage.util.img_as_ubyte(array)
def _convert_int_array(
array: np.ndarray, supported_data_types: tuple[np.dtype]
) -> np.ndarray:
"""Convert an integer array to a standard type.
Select the smallest supported integer dtype that can represent all values in the
array. If no suitable integer dtype is found, convert the array to a supported
float type.
Args:
array: Input numpy array of integer type.
supported_data_types: Tuple of supported numpy dtypes for destination object.
Returns:
Converted numpy array with the selected dtype.
Raises:
ValueError: If no supported dtype can represent the data.
"""
ordered_codes = dtypes_to_sorted_short_codes(supported_data_types, kind_filter="iu")
amin = np.min(array) if array.size > 0 else 0
amax = np.max(array) if array.size > 0 else 0
for code in ordered_codes:
info = np.iinfo(code)
if amin >= info.min and amax <= info.max:
new_type = np.dtype(code).newbyteorder("=")
break
else:
new_type = _convert_float_array(array, supported_data_types).dtype
return array.astype(new_type, copy=False)
def _convert_float_array(
array: np.ndarray, supported_data_types: tuple[np.dtype]
) -> np.ndarray:
"""Convert float/complex array to smallest allowed type at least large as current.
Choose the smallest supported dtype of the same kind ("f" for floats,
"c" for complex) whose itemsize is greater than or equal to the array's
itemsize. If no such type exists, fall back to the largest supported
dtype for that kind.
Args:
array: Array to convert.
supported_data_types: Sequence of allowed dtypes for the destination
object type.
Returns:
Converted array with the selected dtype. If no supported dtype of the
same kind exists, return the original array.
"""
kind = array.dtype.kind
if kind in ["i", "u", "b"]:
kind = "f" # convert integers to floats
itemsize = array.dtype.itemsize
ordered_codes = dtypes_to_sorted_short_codes(supported_data_types, kind_filter=kind)
# Filter out any codes that don't match the requested kind (defensive).
valid_codes: list[str] = []
for code in ordered_codes:
try:
dt = np.dtype(code)
except TypeError:
continue
if dt.kind == kind:
valid_codes.append(code)
if not valid_codes:
# No supported dtype for this kind, return original array.
raise ValueError("Unsupported data type")
# Find smallest supported type with itemsize >= current itemsize.
selected_code: str | None = None
for code in valid_codes:
dt = np.dtype(code)
if dt.itemsize >= itemsize:
selected_code = code
break
else:
# Fallback to the largest supported type for this kind.
selected_code = valid_codes[-1]
new_type = np.dtype(selected_code).newbyteorder("=")
return array.astype(new_type, copy=False)
def convert_array_to_valid_dtype(
array: np.ndarray, valid_dtypes: tuple[np.dtype, ...]
) -> np.ndarray:
"""Convert array to the most appropriate valid dtype.
Converts arrays to one of the valid dtypes, choosing the most appropriate type
based on the input array's characteristics.
Args:
array: array to convert
valid_dtypes: tuple of valid dtypes
Returns:
Converted array with the most appropriate valid dtype.
Raises:
TypeError: if input is not a numpy ndarray
ValueError: if array dtype cannot be converted to any valid type
"""
if not isinstance(array, np.ndarray):
raise TypeError("Input must be a numpy ndarray.")
if array.dtype in valid_dtypes:
return array
kind: str = array.dtype.kind
if kind in ["f", "c"]:
return _convert_float_array(array, valid_dtypes)
if kind == "b":
return _convert_bool_array(array)
if kind in ["i", "u"]:
return _convert_int_array(array, valid_dtypes)
raise ValueError("Unsupported data type")
|