""" Utility functions for sparse matrix module """ from __future__ import division, print_function, absolute_import __all__ = ['upcast','getdtype','isscalarlike','isintlike', 'isshape','issequence','isdense','ismatrix'] import warnings import numpy as np from scipy.lib._version import NumpyVersion # keep this list syncronized with sparsetools #supported_dtypes = ['bool', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', # 'int64', 'uint64', 'float32', 'float64', # 'complex64', 'complex128'] supported_dtypes = ['bool', 'int8','uint8','short','ushort','intc','uintc', 'longlong','ulonglong','single','double','longdouble', 'csingle','cdouble','clongdouble'] supported_dtypes = [np.typeDict[x] for x in supported_dtypes] _upcast_memo = {} def upcast(*args): """Returns the nearest supported sparse dtype for the combination of one or more types. upcast(t0, t1, ..., tn) -> T where T is a supported dtype Examples -------- >>> upcast('int32') >>> upcast('bool') >>> upcast('int32','float32') >>> upcast('bool',complex,float) """ t = _upcast_memo.get(hash(args)) if t is not None: return t if np.all([np.issubdtype(np.bool, arg) for arg in args]): # numpy 1.5.x compat - it gives int8 for # np.find_common_type([np.bool, np.bool) upcast = np.bool else: upcast = np.find_common_type(args, []) for t in supported_dtypes: if np.can_cast(upcast, t): _upcast_memo[hash(args)] = t return t raise TypeError('no supported conversion for types: %r' % (args,)) def upcast_char(*args): """Same as `upcast` but taking dtype.char as input (faster).""" t = _upcast_memo.get(args) if t is not None: return t t = upcast(*map(np.dtype, args)) _upcast_memo[args] = t return t def upcast_scalar(dtype, scalar): """Determine data type for binary operation between an array of type `dtype` and a scalar. """ return (np.array([0], dtype=dtype) * scalar).dtype def downcast_intp_index(arr): """ Down-cast index array to np.intp dtype if it is of a larger dtype. Raise an error if the array contains a value that is too large for intp. """ if arr.dtype.itemsize > np.dtype(np.intp).itemsize: if arr.size == 0: return arr.astype(np.intp) maxval = arr.max() minval = arr.min() if maxval > np.iinfo(np.intp).max or minval < np.iinfo(np.intp).min: raise ValueError("Cannot deal with arrays with indices larger " "than the machine maximum address size " "(e.g. 64-bit indices on 32-bit machine).") return arr.astype(np.intp) return arr def to_native(A): return np.asarray(A,dtype=A.dtype.newbyteorder('native')) def getdtype(dtype, a=None, default=None): """Function used to simplify argument processing. If 'dtype' is not specified (is None), returns a.dtype; otherwise returns a np.dtype object created from the specified dtype argument. If 'dtype' and 'a' are both None, construct a data type out of the 'default' parameter. Furthermore, 'dtype' must be in 'allowed' set. """ #TODO is this really what we want? canCast = True if dtype is None: try: newdtype = a.dtype except AttributeError: if default is not None: newdtype = np.dtype(default) canCast = False else: raise TypeError("could not interpret data type") else: newdtype = np.dtype(dtype) if newdtype == np.object_: warnings.warn("object dtype is not supported by sparse matrices") return newdtype def get_index_dtype(arrays=(), maxval=None, check_contents=False): """ Based on input (integer) arrays `a`, determine a suitable index data type that can hold the data in the arrays. Parameters ---------- arrays : tuple of array_like Input arrays whose types/contents to check maxval : float, optional Maximum value needed check_contents : bool, optional Whether to check the values in the arrays and not just their types. Default: False (check only the types) Returns ------- dtype : dtype Suitable index data type (int32 or int64) """ int32max = np.iinfo(np.int32).max dtype = np.intc if maxval is not None: if maxval > int32max: dtype = np.int64 if isinstance(arrays, np.ndarray): arrays = (arrays,) for arr in arrays: arr = np.asarray(arr) if arr.dtype > np.int32: if check_contents: if arr.size == 0: # a bigger type not needed continue elif np.issubdtype(arr.dtype, np.integer): maxval = arr.max() minval = arr.min() if minval >= np.iinfo(np.int32).min and maxval <= np.iinfo(np.int32).max: # a bigger type not needed continue dtype = np.int64 break return dtype def isscalarlike(x): """Is x either a scalar, an array scalar, or a 0-dim array?""" return np.isscalar(x) or (isdense(x) and x.ndim == 0) def isintlike(x): """Is x appropriate as an index into a sparse matrix? Returns True if it can be cast safely to a machine int. """ if issequence(x): return False else: try: if int(x) == x: return True else: return False except TypeError: return False def isshape(x): """Is x a valid 2-tuple of dimensions? """ try: # Assume it's a tuple of matrix dimensions (M, N) (M, N) = x except: return False else: if isintlike(M) and isintlike(N): if np.ndim(M) == 0 and np.ndim(N) == 0: return True return False def issequence(t): return (isinstance(t, (list, tuple)) and (len(t) == 0 or np.isscalar(t[0]))) \ or (isinstance(t, np.ndarray) and (t.ndim == 1)) def ismatrix(t): return ((issequence(t) and issequence(t[0]) and (len(t[0]) == 0 or np.isscalar(t[0][0]))) or (isinstance(t, np.ndarray) and t.ndim == 2)) def isdense(x): return isinstance(x, np.ndarray) class IndexMixin(object): """ This class simply exists to hold the methods necessary for fancy indexing. """ def _slicetoarange(self, j, shape): """ Given a slice object, use numpy arange to change it to a 1D array. """ start, stop, step = j.indices(shape) return np.arange(start, stop, step) def _unpack_index(self, index): """ Parse index. Always return a tuple of the form (row, col). Where row/col is a integer, slice, or array of integers. """ # First, check if indexing with single boolean matrix. from .base import spmatrix # This feels dirty but... if (isinstance(index, (spmatrix, np.ndarray)) and (index.ndim == 2) and index.dtype.kind == 'b'): return index.nonzero() # Parse any ellipses. index = self._check_ellipsis(index) # Next, parse the tuple or object if isinstance(index, tuple): if len(index) == 2: row, col = index elif len(index) == 1: row, col = index[0], slice(None) else: raise IndexError('invalid number of indices') else: row, col = index, slice(None) # Next, check for validity, or transform the index as needed. row, col = self._check_boolean(row, col) return row, col def _check_ellipsis(self, index): """Process indices with Ellipsis. Returns modified index.""" if index is Ellipsis: return (slice(None), slice(None)) elif isinstance(index, tuple): # Find first ellipsis for j, v in enumerate(index): if v is Ellipsis: first_ellipsis = j break else: first_ellipsis = None # Expand the first one if first_ellipsis is not None: # Shortcuts if len(index) == 1: return (slice(None), slice(None)) elif len(index) == 2: if first_ellipsis == 0: if index[1] is Ellipsis: return (slice(None), slice(None)) else: return (slice(None), index[1]) else: return (index[0], slice(None)) # General case tail = () for v in index[first_ellipsis+1:]: if v is not Ellipsis: tail = tail + (v,) nd = first_ellipsis + len(tail) nslice = max(0, 2 - nd) return index[:first_ellipsis] + (slice(None),)*nslice + tail return index def _check_boolean(self, row, col): from .base import isspmatrix # ew... # Supporting sparse boolean indexing with both row and col does # not work because spmatrix.ndim is always 2. if isspmatrix(row) or isspmatrix(col): raise IndexError("Indexing with sparse matrices is not supported" " except boolean indexing where matrix and index are equal" " shapes.") if isinstance(row, np.ndarray) and row.dtype.kind == 'b': row = self._boolean_index_to_array(row) if isinstance(col, np.ndarray) and col.dtype.kind == 'b': col = self._boolean_index_to_array(col) return row, col def _boolean_index_to_array(self, i): if i.ndim > 1: raise IndexError('invalid index shape') return i.nonzero()[0] def _index_to_arrays(self, i, j): i, j = self._check_boolean(i, j) i_slice = isinstance(i, slice) if i_slice: i = self._slicetoarange(i, self.shape[0])[:,None] else: i = np.atleast_1d(i) if isinstance(j, slice): j = self._slicetoarange(j, self.shape[1])[None,:] if i.ndim == 1: i = i[:,None] elif not i_slice: raise IndexError('index returns 3-dim structure') elif isscalarlike(j): # row vector special case j = np.atleast_1d(j) if i.ndim == 1: i, j = np.broadcast_arrays(i, j) i = i[:, None] j = j[:, None] return i, j else: j = np.atleast_1d(j) if i_slice and j.ndim > 1: raise IndexError('index returns 3-dim structure') i, j = np.broadcast_arrays(i, j) if i.ndim == 1: # return column vectors for 1-D indexing i = i[None,:] j = j[None,:] elif i.ndim > 2: raise IndexError("Index dimension must be <= 2") return i, j def _compat_unique_impl(ar, return_index=False, return_inverse=False): """ Copy of numpy.unique() from Numpy 1.7.1. Earlier versions have bugs in how return_index behaves. """ try: ar = ar.flatten() except AttributeError: if not return_inverse and not return_index: items = sorted(set(ar)) return np.asarray(items) else: ar = np.asanyarray(ar).flatten() if ar.size == 0: if return_inverse and return_index: return ar, np.empty(0, np.bool), np.empty(0, np.bool) elif return_inverse or return_index: return ar, np.empty(0, np.bool) else: return ar if return_inverse or return_index: if return_index: perm = ar.argsort(kind='mergesort') else: perm = ar.argsort() aux = ar[perm] flag = np.concatenate(([True], aux[1:] != aux[:-1])) if return_inverse: iflag = np.cumsum(flag) - 1 iperm = perm.argsort() if return_index: return aux[flag], perm[flag], iflag[iperm] else: return aux[flag], iflag[iperm] else: return aux[flag], perm[flag] else: ar.sort() flag = np.concatenate(([True], ar[1:] != ar[:-1])) return ar[flag] if NumpyVersion(np.__version__) > '1.7.0-dev': _compat_unique = np.unique else: _compat_unique = _compat_unique_impl def _compat_bincount_impl(x, weights=None, minlength=None): """ Bincount with minlength keyword added for Numpy 1.5. """ if weights is None: x = np.bincount(x) else: x = np.bincount(x, weights=weights) if minlength is not None: if x.shape[0] < minlength: x = np.r_[x, np.zeros((minlength - x.shape[0],))] return x if NumpyVersion(np.__version__) > '1.6.0-dev': _compat_bincount = np.bincount else: _compat_bincount = _compat_bincount_impl