1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
|
# Author: Matthew Brett
"""
Recaster class for recasting numeric arrays
"""
from numpy import *
def sctype_attributes():
''' Return dictionary describing numpy scalar types '''
d_dict = {}
for sc_type in ('complex','float'):
t_list = sctypes[sc_type]
for T in t_list:
F = finfo(T)
dt = dtype(T)
d_dict[T] = {
'kind': dt.kind,
'size': dt.itemsize,
'max': F.max,
'min': F.min}
for T in sctypes['int']:
dt = dtype(T)
sz = dt.itemsize
bits = sz*8-1
end = 2**bits
d_dict[T] = {
'kind': dt.kind,
'size': sz,
'min': -end,
'max': end-1
}
for T in sctypes['uint']:
dt = dtype(T)
sz = dt.itemsize
bits = sz*8
end = 2**bits
d_dict[T] = {
'kind': dt.kind,
'size': sz,
'min': 0,
'max': end
}
return d_dict
class RecastError(ValueError):
pass
class Recaster(object):
''' Class to recast arrays to one of acceptable scalar types
Initialization specifies acceptable types (ATs)
Implements recast method - returns array that may be of different
storage type to the input array, where the new type is one of the
ATs. Recast method will return a larger type if no smaller type
will contain the data without loss of precision greater than
specified in options at object creation.
'''
_sctype_attributes = sctype_attributes()
_k = 2**10
_option_defaults = {
'only_if_none': {
'fp_to_int': 'if_none',
'fp_to_fp': 'if_none',
'int_to_int': 'if_none',
'int_to_fp': 'if_none',
'downcast_only': False,
'downcast_within_fp': False,
'guarantee_fp_to_fp_precision': False,
'prefer_input_at_threshold': 0,
'prefer_int_type': 'i',
},
'smallest': {
'fp_to_int': 'always',
'fp_to_fp': 'always',
'int_to_int': 'always',
'int_to_fp': 'always',
'downcast_only': False,
'downcast_within_fp': True,
'guarantee_fp_to_fp_precision': False,
'prefer_input_at_threshold': 0,
'prefer_int_type': 'i',
},
'fairly_small': {
'fp_to_int': 'always',
'fp_to_fp': 'if_none',
'int_to_int': 'always',
'int_to_fp': 'if_none',
'downcast_only': False,
'downcast_within_fp': False,
'guarantee_fp_to_fp_precision': False,
'prefer_input_at_threshold': 2 * _k,
'prefer_int_type': 'i',
},
'preserve_precision': {
'fp_to_int': 'never',
'fp_to_fp': 'if_none',
'int_to_int': 'if_none',
'int_to_fp': 'never',
'downcast_only': False,
'downcast_within_fp': False,
'guarantee_fp_to_fp_precision': True,
'prefer_input_at_threshold': 0,
'prefer_int_type': 'i',
}
}
def __init__(self, sctype_list=None,
sctype_tols=None,
recast_options='only_if_none'):
''' Set types for which we are attempting to downcast
Input
sctype_list - list of acceptable scalar types
If None defaults to all system types
sctype_tols - dictionary key datatype, values rtol, tol
to specify tolerances for checking near equality in
downcasting. Note that tolerance values for integers
are used for upcasting integers to floats
recast_options - dictionary of options for recasting or string
specifying one of default options dictionaries.
recast_option strings can be:
only_if_none - only attempts recast if the type is not in
acceptable types
smallest - return array of smallest possible type within tolerance
fairly_small - compromise set of options between speed of downcast and
size of output
preserve_precision - recasts arrays only to types that preserve precision
Elements in recast_options dictionary:
fp_to_int - "always" or "if_none" or "never"
When to attempt cast of floating point to int
fp_to_fp - "always" or "if_none" or "never"
When to attempt cast of floating point to floating point
int_to_int - "always" or "if_none" or "never"
When to attempt cast of int to int
int_to_fp - "always" or "if_none" or "never"
When to attempt cast of int to floating point
downcast_only - if True, only return datatype of same size or less
downcast_within_fp - if True, tries downcasting within fp types, even
if there is an fp type that already matches
guarantee_fp_to_fp_precision - if True, will only do fp to fp array
casting to type of same or higher precision. Note that
if fp_to_int recasting is allowed this will allow
precision loss of fp values
prefer_input_at_threshold - number of bytes. If input array size
is less than or equal to this number, and in valid
types list, return the array without attempting
recasting
prefer_int_type - if 'i', when recasting to integer type, prefer int
when equal sized uint is also available. Prefer
uint otherwise.
'''
if sctype_list is None:
sctype_list = self._sctype_attributes.keys()
self.sctype_list = sctype_list
# Tolerances
self.sctype_tols = self.default_sctype_tols()
if sctype_tols is not None:
self.sctype_tols.update(sctype_tols)
# Casting options
if recast_options is None:
recast_options = 'only_if_none'
if isinstance(recast_options, basestring):
try:
self.recast_options = self._option_defaults[recast_options]
except KeyError:
raise ValueError, \
'Did not recognize option string %s' % recast_options
else:
self.recast_options = self._option_defaults['only_if_none']
self.recast_options.update(recast_options)
# Cache sctype sizes,
self.sized_sctypes = {}
for k in ('c', 'f', 'i', 'u'):
self.sized_sctypes[k] = self.sctypes_by_size(k)
# Cache all integer sizes
self.ints_sized_sctypes = []
for k, v in self.sized_sctypes.items():
if k in ('u', 'i'):
for e in v:
self.ints_sized_sctypes.append(e)
if self.ints_sized_sctypes:
self.ints_sized_sctypes.sort(lambda x, y: cmp(y[1], x[1]))
# Cache capable types list and sizes
self._capable_sctypes = {}
self._capable_sctype_sizes = {}
self._c2f_capable_sctype_sizes = {}
flts = self.sized_sctypes['f']
for k in self._sctype_attributes:
sct = self.get_capable_sctype(k)
self._capable_sctypes[k] = sct
if sct is None:
self._capable_sctype_sizes[k] = inf
if dtype(k).type == 'c':
self._c2f_capable_sctype_sizes[k] = inf
continue
dtp = dtype(sct)
self._capable_sctype_sizes[k] = dtp.itemsize
fsz = inf
min_sz = ceil(dtp.itemsize / 2.0)
if dtp.kind == 'c':
for T, sz in flts:
if sz < min_sz:
break
fsz = sz
self._c2f_capable_sctype_sizes[k] = fsz
def default_sctype_tols(self):
''' Default allclose tolerance values for all dtypes '''
t_dict = {}
for sc_type in ('complex','float'):
t_list = sctypes[sc_type]
for T in t_list:
dt = dtype(T)
F = finfo(dt)
t_dict[T] = {
'rtol': F.eps,
'atol': F.tiny}
F = finfo(float64)
for sc_type in ('int', 'uint'):
t_list = sctypes[sc_type]
for T in t_list:
dt = dtype(T)
t_dict[T] = {
'rtol': F.eps,
'atol': F.tiny}
return t_dict
def sctypes_by_size(self, kind):
''' Returns storage size ordered list of entries of scalar type sctype
Input
kind - one of "c", "f", "i" or "u"
(for complex, float, integer, unsigned integer)
'''
D = []
for t in self.sctype_list:
dt = dtype(t)
if dt.kind == kind:
D.append([t, dt.itemsize])
D.sort(lambda x, y: cmp(y[1], x[1]))
return D
def get_capable_sctype(self, sct):
''' Return smallest scalar type containing sct type without precision loss
Input
sct - scalar type
ID = input type. AT = acceptable type. Return ID if ID is
in ATs. Otherwise return smallest AT that is larger than or
same size as ID.
If the desired sctype is an integer, returns the smallest
integer (int or uint) that can contain the range of the input
integer type
If there is no type that can contain sct without loss of
precision, return None
'''
if sct in self.sctype_list:
return sct
out_t = None
# Unsigned and signed integers
# Precision loss defined by max min outside datatype range
D = self._sctype_attributes[sct]
if D['kind'] in ('u', 'i'):
out_t = self.smallest_int_sctype(D['max'], D['min'])
else:
# Complex and float types
# Precision loss defined by data size < sct
sctypes = self.sized_sctypes[D['kind']]
if not sctypes:
return None
dti = D['size']
out_t = None
for i, t in enumerate(sctypes):
if t[1] >= dti:
out_t = t[0]
else:
break
return out_t
def cast_to_fp(self, arr, kind,
max_size=inf,
continue_down=False):
''' Return fp arr maybe recast to specified kind, different sctype
Inputs
arr - array to possibly recast
kind - kind of array to recast within
(one of "c", "f", "u", "i")
max_size - maximum size of sctype to return (in bytes)
continue_down - if False, return array of largest sctype
within tolerance and >= max_size
if True, continue downcasting within kind
to find smallest possible within tolerance
If arr cannot be recast within given tolerances, and size,
return None
'''
tols = self.sctype_tols[arr.dtype.type]
rtol, atol = tols['rtol'], tols['atol']
ret_arr = None
for T, sz in self.sized_sctypes[kind]:
if sz > max_size:
continue
test_arr = arr.astype(T)
if allclose(test_arr, arr, rtol, atol):
ret_arr = test_arr
if not continue_down:
break
else:
break
return ret_arr
def smallest_int_sctype(self, mx, mn, prefer='i'):
''' Return integer type with smallest storage containing mx and mn
Inputs
mx - maximum value
mn - minumum value
prefer - if == 'i' prefer int for range also compatible
uint, else prefer uint in same situation
Returns None if no integer can contain this range
'''
sct = None
sz = inf
for T, tsz in self.ints_sized_sctypes:
t_dict = self._sctype_attributes[T]
if t_dict['max'] >= mx and t_dict['min'] <= mn:
if tsz < sz:
sct = T
sz = tsz
elif tsz == sz:
if t_dict['kind'] == prefer:
sct = T
return sct
def cast_to_integer(self, arr, prefer='i'):
''' Casts arr to smallest integer containing range
Returns None if range of arr cannot be contained in acceptable
integer types
prefer - if == 'i' prefer int for range also compatible
uint, else prefer uint in same situation
'''
mx = amax(arr)
mn = amin(arr)
idt = self.smallest_int_sctype(mx, mn, prefer)
if idt is not None:
return arr.astype(idt)
return None
def recast(self, arr):
''' Recast array to type in type list
If cannot recast to an array within tolerance,
raise error
'''
dtp = arr.dtype
dtk = dtp.kind
dti = dtp.itemsize
dtt = dtp.type
opts = self.recast_options
curr_size = inf
ret_arr = None
valid_input_arr = dtt in self.sctype_list
if valid_input_arr:
if opts['prefer_input_at_threshold'] > arr.nbytes:
return arr
ret_arr = arr
if opts['downcast_only'] or valid_input_arr:
curr_size = dti
tols = self.sctype_tols[dtt]
rtol, atol = tols['rtol'], tols['atol']
if dtk in ('c', 'f'):
if opts['fp_to_int'] == 'always' or \
(opts['fp_to_int'] == 'if_none' and
ret_arr is None):
test_arr = self.cast_to_integer(arr,
opts['prefer_int_type'])
if test_arr is not None and \
test_arr.dtype.itemsize < curr_size:
if allclose(arr, test_arr, rtol, atol):
ret_arr = test_arr
curr_size = ret_arr.dtype.itemsize
if opts['fp_to_fp'] == 'always' or \
(opts['fp_to_fp'] == 'if_none' and
ret_arr is None):
if dtk == 'c' and not opts['guarantee_fp_to_fp_precision']:
# Try casting to float
max_size = min([self._c2f_capable_sctype_sizes[dtt],
curr_size - 1])
test_arr = self.cast_to_fp(arr,
'f',
max_size,
opts['downcast_within_fp'])
if test_arr is not None:
ret_arr = test_arr
curr_size = ret_arr.dtype.itemsize
if opts['fp_to_fp'] == 'always' or \
(opts['fp_to_fp'] == 'if_none' and
ret_arr is None):
# Cast float or complex to another of same type
if opts['guarantee_fp_to_fp_precision']:
sct = self._capable_sctypes[dtt]
sz = self._capable_sctype_sizes[dtt]
if sz < curr_size and sct is not None:
ret_arr = arr.astype(sct)
curr_size = sz
else:
max_size = min([self._capable_sctype_sizes[dtt],
curr_size - 1])
test_arr = self.cast_to_fp(arr,
dtk,
max_size,
opts['downcast_within_fp'])
if test_arr is not None:
ret_arr = test_arr
curr_size = ret_arr.dtype.itemsize
elif dtk in ('u', 'i'):
if opts['int_to_int'] == 'always' or \
(opts['int_to_int'] == 'if_none' and
ret_arr is None):
test_arr = self.cast_to_integer(arr,
opts['prefer_int_type'])
if test_arr is not None and \
test_arr.dtype.itemsize < curr_size:
ret_arr = test_arr
curr_size = ret_arr.dtype.itemsize
if opts['int_to_fp'] == 'always' or \
(opts['int_to_fp'] == 'if_none' and
ret_arr is None):
test_arr = self.cast_to_fp(arr,
'f',
curr_size-1,
opts['downcast_within_fp'])
if test_arr is not None:
ret_arr = test_arr
else:
raise TypeError, 'Do not recognize array kind %s' % dtk
if ret_arr is not None:
return ret_arr
raise RecastError, 'Cannot recast array within tolerance'
def recast_best_sctype(self, arr):
''' Recast array, return closest sctype to original
Returns tuple of recast array and best sctype to contain
original data before recasting
'''
sct = arr.dtype.type
arr = self.recast(arr)
if sct not in self.sctype_list:
sct = self._capable_sctypes[sct]
if sct is None:
sct = arr.dtype.type
return arr, sct
|