File: recaster.py

package info (click to toggle)
python-scipy 0.7.2%2Bdfsg1-1%2Bdeb6u1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze-lts
  • size: 28,572 kB
  • ctags: 36,183
  • sloc: cpp: 216,880; fortran: 76,016; python: 71,833; ansic: 62,118; makefile: 243; sh: 17
file content (467 lines) | stat: -rw-r--r-- 17,823 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
# Author: Matthew Brett

"""
Recaster class for recasting numeric arrays
"""

from numpy import *

def sctype_attributes():
    ''' Return dictionary describing numpy scalar types '''
    d_dict = {}
    for sc_type in ('complex','float'):
        t_list = sctypes[sc_type]
        for T in t_list:
            F = finfo(T)
            dt = dtype(T)
            d_dict[T] = {
                'kind': dt.kind,
                'size': dt.itemsize,
                'max': F.max,
                'min': F.min}
    for T in sctypes['int']:
        dt = dtype(T)
        sz = dt.itemsize
        bits = sz*8-1
        end = 2**bits
        d_dict[T] = {
            'kind': dt.kind,
            'size': sz,
            'min': -end,
            'max': end-1
            }
    for T in sctypes['uint']:
        dt = dtype(T)
        sz = dt.itemsize
        bits = sz*8
        end = 2**bits
        d_dict[T] = {
            'kind': dt.kind,
            'size': sz,
            'min': 0,
            'max': end
        }
    return d_dict

class RecastError(ValueError):
    pass

class Recaster(object):
    ''' Class to recast arrays to one of acceptable scalar types

    Initialization specifies acceptable types (ATs)

    Implements recast method - returns array that may be of different
    storage type to the input array, where the new type is one of the
    ATs. Recast method will return a larger type if no smaller type
    will contain the data without loss of precision greater than
    specified in options at object creation.
    '''

    _sctype_attributes = sctype_attributes()
    _k = 2**10
    _option_defaults = {
        'only_if_none': {
        'fp_to_int': 'if_none',
        'fp_to_fp': 'if_none',
        'int_to_int': 'if_none',
        'int_to_fp': 'if_none',
        'downcast_only': False,
        'downcast_within_fp': False,
        'guarantee_fp_to_fp_precision': False,
        'prefer_input_at_threshold': 0,
        'prefer_int_type': 'i',
        },
        'smallest': {
        'fp_to_int': 'always',
        'fp_to_fp': 'always',
        'int_to_int': 'always',
        'int_to_fp': 'always',
        'downcast_only': False,
        'downcast_within_fp': True,
        'guarantee_fp_to_fp_precision': False,
        'prefer_input_at_threshold': 0,
        'prefer_int_type': 'i',
        },
        'fairly_small': {
        'fp_to_int': 'always',
        'fp_to_fp': 'if_none',
        'int_to_int': 'always',
        'int_to_fp': 'if_none',
        'downcast_only': False,
        'downcast_within_fp': False,
        'guarantee_fp_to_fp_precision': False,
        'prefer_input_at_threshold': 2 * _k,
        'prefer_int_type': 'i',
        },
        'preserve_precision': {
        'fp_to_int': 'never',
        'fp_to_fp': 'if_none',
        'int_to_int': 'if_none',
        'int_to_fp': 'never',
        'downcast_only': False,
        'downcast_within_fp': False,
        'guarantee_fp_to_fp_precision': True,
        'prefer_input_at_threshold': 0,
        'prefer_int_type': 'i',
        }
        }

    def __init__(self, sctype_list=None,
                 sctype_tols=None,
                 recast_options='only_if_none'):
        ''' Set types for which we are attempting to downcast

        Input
        sctype_list  - list of acceptable scalar types
                     If None defaults to all system types
        sctype_tols  - dictionary key datatype, values rtol, tol
                     to specify tolerances for checking near equality in
                     downcasting. Note that tolerance values for integers
                     are used for upcasting integers to floats
        recast_options - dictionary of options for recasting or string
                     specifying one of default options dictionaries.

        recast_option strings can be:
        only_if_none - only attempts recast if the type is not in
                       acceptable types
        smallest     - return array of smallest possible type within tolerance
        fairly_small - compromise set of options between speed of downcast and
                       size of output
        preserve_precision - recasts arrays only to types that preserve precision

        Elements in recast_options dictionary:
        fp_to_int     - "always" or "if_none" or "never"
                         When to attempt cast of floating point to int
        fp_to_fp      - "always" or "if_none" or "never"
                         When to attempt cast of floating point to floating point
        int_to_int    - "always" or "if_none" or "never"
                         When to attempt cast of int to int
        int_to_fp     - "always" or "if_none" or "never"
                         When to attempt cast of int to floating point
        downcast_only - if True, only return datatype of same size or less
        downcast_within_fp - if True, tries downcasting within fp types, even
                             if there is an fp type that already matches
        guarantee_fp_to_fp_precision - if True, will only do fp to fp array
                        casting to type of same or higher precision. Note that
                        if fp_to_int recasting is allowed this will allow
                        precision loss of fp values
        prefer_input_at_threshold - number of bytes. If input array size
                        is less than or equal to this number, and in valid
                        types list, return the array without attempting
                        recasting
        prefer_int_type - if 'i', when recasting to integer type, prefer int
                        when equal sized uint is also available. Prefer
                        uint otherwise.
        '''
        if sctype_list is None:
            sctype_list = self._sctype_attributes.keys()
        self.sctype_list = sctype_list
        # Tolerances
        self.sctype_tols = self.default_sctype_tols()
        if sctype_tols is not None:
            self.sctype_tols.update(sctype_tols)
        # Casting options
        if recast_options is None:
            recast_options = 'only_if_none'
        if isinstance(recast_options, basestring):
            try:
                self.recast_options = self._option_defaults[recast_options]
            except KeyError:
                raise ValueError, \
                      'Did not recognize option string %s' % recast_options
        else:
            self.recast_options = self._option_defaults['only_if_none']
            self.recast_options.update(recast_options)
        # Cache sctype sizes,
        self.sized_sctypes = {}
        for k in ('c', 'f', 'i', 'u'):
            self.sized_sctypes[k] = self.sctypes_by_size(k)
        # Cache all integer sizes
        self.ints_sized_sctypes = []
        for k, v in self.sized_sctypes.items():
            if k in ('u', 'i'):
                for e in v:
                    self.ints_sized_sctypes.append(e)
        if self.ints_sized_sctypes:
            self.ints_sized_sctypes.sort(lambda x, y: cmp(y[1], x[1]))
        # Cache capable types list and sizes
        self._capable_sctypes = {}
        self._capable_sctype_sizes = {}
        self._c2f_capable_sctype_sizes = {}
        flts = self.sized_sctypes['f']
        for k in self._sctype_attributes:
            sct = self.get_capable_sctype(k)
            self._capable_sctypes[k] = sct
            if sct is None:
                self._capable_sctype_sizes[k] = inf
                if dtype(k).type == 'c':
                    self._c2f_capable_sctype_sizes[k] = inf
                continue
            dtp = dtype(sct)
            self._capable_sctype_sizes[k] = dtp.itemsize
            fsz = inf
            min_sz = ceil(dtp.itemsize / 2.0)
            if dtp.kind == 'c':
                for T, sz in flts:
                    if sz < min_sz:
                        break
                    fsz = sz
                self._c2f_capable_sctype_sizes[k] = fsz

    def default_sctype_tols(self):
        ''' Default allclose tolerance values for all dtypes '''
        t_dict = {}
        for sc_type in ('complex','float'):
            t_list = sctypes[sc_type]
            for T in t_list:
                dt = dtype(T)
                F = finfo(dt)
                t_dict[T] = {
                    'rtol': F.eps,
                    'atol': F.tiny}
        F = finfo(float64)
        for sc_type in ('int', 'uint'):
            t_list = sctypes[sc_type]
            for T in t_list:
                dt = dtype(T)
                t_dict[T] = {
                    'rtol': F.eps,
                    'atol': F.tiny}
        return t_dict

    def sctypes_by_size(self, kind):
        ''' Returns storage size ordered list of entries of scalar type sctype

        Input
        kind   - one of  "c",  "f", "i" or "u"
                 (for complex, float, integer, unsigned integer)
        '''
        D = []
        for t in self.sctype_list:
            dt = dtype(t)
            if dt.kind == kind:
                D.append([t, dt.itemsize])
        D.sort(lambda x, y: cmp(y[1], x[1]))
        return D

    def get_capable_sctype(self, sct):
        ''' Return smallest scalar type containing sct type without precision loss

        Input
        sct     - scalar type

        ID = input type. AT = acceptable type.  Return ID if ID is
        in ATs. Otherwise return smallest AT that is larger than or
        same size as ID.

        If the desired sctype is an integer, returns the smallest
        integer (int or uint) that can contain the range of the input
        integer type

        If there is no type that can contain sct without loss of
        precision, return None
        '''
        if sct in self.sctype_list:
            return sct
        out_t = None
        # Unsigned and signed integers
        # Precision loss defined by max min outside datatype range
        D = self._sctype_attributes[sct]
        if D['kind'] in ('u', 'i'):
            out_t = self.smallest_int_sctype(D['max'], D['min'])
        else:
            # Complex and float types
            # Precision loss defined by data size < sct
            sctypes = self.sized_sctypes[D['kind']]
            if not sctypes:
                return None
            dti = D['size']
            out_t = None
            for i, t in enumerate(sctypes):
                if t[1] >= dti:
                    out_t = t[0]
                else:
                    break
        return out_t

    def cast_to_fp(self, arr, kind,
                   max_size=inf,
                   continue_down=False):
        ''' Return fp arr maybe recast to specified kind, different sctype

        Inputs
        arr         - array to possibly recast
        kind        - kind of array to recast within
                      (one of "c", "f", "u", "i")
        max_size    - maximum size of sctype to return (in bytes)
        continue_down - if False, return array of largest sctype
                        within tolerance and >= max_size
                        if True, continue downcasting within kind
                        to find smallest possible within tolerance

        If arr cannot be recast within given tolerances, and size,
        return None
        '''
        tols = self.sctype_tols[arr.dtype.type]
        rtol, atol = tols['rtol'], tols['atol']
        ret_arr = None
        for T, sz in self.sized_sctypes[kind]:
            if sz > max_size:
                continue
            test_arr = arr.astype(T)
            if allclose(test_arr, arr, rtol, atol):
                ret_arr = test_arr
                if not continue_down:
                    break
            else:
                break
        return ret_arr

    def smallest_int_sctype(self, mx, mn, prefer='i'):
        ''' Return integer type with smallest storage containing mx and mn

        Inputs
        mx      - maximum value
        mn      - minumum value
        prefer  - if == 'i' prefer int for range also compatible
                  uint, else prefer uint in same situation

        Returns None if no integer can contain this range
        '''
        sct = None
        sz = inf
        for T, tsz in self.ints_sized_sctypes:
            t_dict = self._sctype_attributes[T]
            if t_dict['max'] >= mx and t_dict['min'] <= mn:
                if tsz < sz:
                    sct = T
                    sz = tsz
                elif tsz == sz:
                    if t_dict['kind'] == prefer:
                        sct = T
        return sct

    def cast_to_integer(self, arr, prefer='i'):
        ''' Casts arr to smallest integer containing range

        Returns None if range of arr cannot be contained in acceptable
        integer types

        prefer  - if == 'i' prefer int for range also compatible
                  uint, else prefer uint in same situation

        '''
        mx = amax(arr)
        mn = amin(arr)
        idt = self.smallest_int_sctype(mx, mn, prefer)
        if idt is not None:
            return arr.astype(idt)
        return None

    def recast(self, arr):
        ''' Recast array to type in type list

        If cannot recast to  an array within tolerance,
        raise error
        '''
        dtp = arr.dtype
        dtk = dtp.kind
        dti = dtp.itemsize
        dtt = dtp.type
        opts = self.recast_options
        curr_size = inf
        ret_arr = None
        valid_input_arr = dtt in self.sctype_list
        if valid_input_arr:
            if opts['prefer_input_at_threshold'] > arr.nbytes:
                return arr
            ret_arr = arr
        if opts['downcast_only'] or valid_input_arr:
            curr_size = dti
        tols = self.sctype_tols[dtt]
        rtol, atol = tols['rtol'], tols['atol']
        if dtk in ('c', 'f'):
            if opts['fp_to_int'] == 'always' or \
                   (opts['fp_to_int'] == 'if_none' and
                    ret_arr is None):
                test_arr = self.cast_to_integer(arr,
                                                opts['prefer_int_type'])
                if test_arr is not None and \
                   test_arr.dtype.itemsize < curr_size:
                    if allclose(arr, test_arr, rtol, atol):
                        ret_arr = test_arr
                        curr_size = ret_arr.dtype.itemsize
            if opts['fp_to_fp'] == 'always' or \
                   (opts['fp_to_fp'] == 'if_none' and
                    ret_arr is None):
                if dtk == 'c' and not opts['guarantee_fp_to_fp_precision']:
                    # Try casting to float
                    max_size = min([self._c2f_capable_sctype_sizes[dtt],
                                    curr_size - 1])
                    test_arr = self.cast_to_fp(arr,
                                               'f',
                                               max_size,
                                               opts['downcast_within_fp'])
                    if test_arr is not None:
                        ret_arr = test_arr
                        curr_size = ret_arr.dtype.itemsize
                if opts['fp_to_fp'] == 'always' or \
                       (opts['fp_to_fp'] == 'if_none' and
                        ret_arr is None):
                    # Cast float or complex to another of same type
                    if opts['guarantee_fp_to_fp_precision']:
                        sct = self._capable_sctypes[dtt]
                        sz = self._capable_sctype_sizes[dtt]
                        if sz < curr_size and sct is not None:
                            ret_arr = arr.astype(sct)
                            curr_size = sz
                    else:
                        max_size = min([self._capable_sctype_sizes[dtt],
                                        curr_size - 1])
                        test_arr = self.cast_to_fp(arr,
                                                   dtk,
                                                   max_size,
                                                   opts['downcast_within_fp'])
                        if test_arr is not None:
                            ret_arr = test_arr
                            curr_size = ret_arr.dtype.itemsize
        elif dtk in ('u', 'i'):
            if opts['int_to_int'] == 'always' or \
                   (opts['int_to_int'] == 'if_none' and
                    ret_arr is None):
                test_arr = self.cast_to_integer(arr,
                                                opts['prefer_int_type'])
                if test_arr is not None and \
                       test_arr.dtype.itemsize < curr_size:
                    ret_arr = test_arr
                    curr_size = ret_arr.dtype.itemsize
            if opts['int_to_fp'] == 'always' or \
                   (opts['int_to_fp'] == 'if_none' and
                    ret_arr is None):
                test_arr = self.cast_to_fp(arr,
                                           'f',
                                           curr_size-1,
                                           opts['downcast_within_fp'])
                if test_arr is not None:
                    ret_arr = test_arr
        else:
            raise TypeError, 'Do not recognize array kind %s' % dtk

        if ret_arr is not None:
            return ret_arr
        raise RecastError, 'Cannot recast array within tolerance'

    def recast_best_sctype(self, arr):
        ''' Recast array, return closest sctype to original

        Returns tuple of recast array and best sctype to contain
        original data before recasting
        '''
        sct = arr.dtype.type
        arr = self.recast(arr)
        if sct not in self.sctype_list:
            sct = self._capable_sctypes[sct]
            if sct is None:
                sct = arr.dtype.type
        return arr, sct