File: npfile.py

package info (click to toggle)
python-scipy 0.6.0-12
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 32,016 kB
  • ctags: 46,675
  • sloc: cpp: 124,854; ansic: 110,614; python: 108,664; fortran: 76,260; objc: 424; makefile: 384; sh: 10
file content (225 lines) | stat: -rw-r--r-- 7,998 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Authors: Matthew Brett, Travis Oliphant

"""
Class for reading and writing numpy arrays from / to binary files
"""

import sys

import numpy as N

__all__ = ['sys_endian_code', 'npfile']

sys_endian_code = (sys.byteorder == 'little') and '<' or '>'

class npfile(object):
    ''' Class for reading and writing numpy arrays to/from files
    
    Inputs:
      file_name -- The complete path name to the file to open
                   or an open file-like object
      permission -- Open the file with given permissions: ('r', 'w', 'a')
                    for reading, writing, or appending.  This is the same
                    as the mode argument in the builtin open command.
      format -- The byte-ordering of the file:
                (['native', 'n'], ['ieee-le', 'l'], ['ieee-be', 'B']) for
                native, little-endian, or big-endian respectively.

    Attributes:
      endian   -- default endian code for reading / writing
      order    -- default order for reading writing ('C' or 'F')
      file     -- file object containing read / written data

    Methods:
      seek, tell, close  -- as for file objects
      rewind             -- set read position to beginning of file
      read_raw           -- read string data from file (read method of file)
      write_raw          -- write string data to file (write method of file)
      read_array         -- read numpy array from binary file data
      write_array        -- write numpy array contents to binary file
      
    Example use:
    >>> from StringIO import StringIO
    >>> import numpy as N
    >>> from scipy.io import npfile
    >>> arr = N.arange(10).reshape(5,2)
    >>> # Make file-like object (could also be file name)
    >>> my_file = StringIO()
    >>> npf = npfile(my_file)
    >>> npf.write_array(arr)
    >>> npf.rewind()
    >>> npf.read_array((5,2), arr.dtype)
    >>> npf.close()
    >>> # Or read write in Fortran order, Big endian
    >>> # and read back in C, system endian
    >>> my_file = StringIO()
    >>> npf = npfile(my_file, order='F', endian='>')
    >>> npf.write_array(arr)
    >>> npf.rewind()
    >>> npf.read_array((5,2), arr.dtype)
    '''

    def __init__(self, file_name,
                 permission='rb',
                 endian = 'dtype',
                 order = 'C'):
        if 'b' not in permission: permission += 'b'
        if isinstance(file_name, basestring):
            self.file = file(file_name, permission)
        else:
            try:
                closed = file_name.closed
            except AttributeError:
                raise TypeError, 'Need filename or file object as input'
            if closed:
                raise TypeError, 'File object should be open'
            self.file = file_name
        self.endian = endian
        self.order = order

    def get_endian(self):
        return self._endian
    def set_endian(self, endian_code):
        self._endian = self.parse_endian(endian_code)
    endian = property(get_endian, set_endian, None, 'get/set endian code')
                                     
    def parse_endian(self, endian_code):
        ''' Returns valid endian code from wider input options'''
        if endian_code in ['native', 'n', 'N','default', '=']:
            return sys_endian_code
        elif endian_code in ['swapped', 's', 'S']:
            return sys_endian_code == '<' and '>' or '<'
        elif endian_code in ['ieee-le','l','L','little-endian',
                             'little','le','<']:
            return '<'
        elif endian_code in ['ieee-be','B','b','big-endian',
                             'big','be', '>']:
            return '>'
        elif endian_code == 'dtype':
            return 'dtype'
        else:
            raise ValueError, "Unrecognized endian code: " + endian_code
        return

    def __del__(self):
        try:
            self.file.close()
        except:
            pass

    def close(self):
        self.file.close()

    def seek(self, *args):
        self.file.seek(*args)

    def tell(self):
        return self.file.tell()

    def rewind(self,howmany=None):
        """Rewind a file to its beginning or by a specified amount.
        """
        if howmany is None:
            self.seek(0)
        else:
            self.seek(-howmany,1)

    def read_raw(self, size=-1):
        """Read raw bytes from file as string."""
        return self.file.read(size)

    def write_raw(self, str):
        """Write string to file as raw bytes."""
        return self.file.write(str)

    def remaining_bytes(self):
        cur_pos = self.tell()
        self.seek(0, 2)
        end_pos = self.tell()
        self.seek(cur_pos)
        return end_pos - cur_pos

    def _endian_order(self, endian, order):
        ''' Housekeeping function to return endian, order from input args '''
        if endian is None:
            endian = self.endian
        else:
            endian = self.parse_endian(endian)
        if order is None:
            order = self.order
        return endian, order

    def _endian_from_dtype(self, dt):
        dt_endian = dt.byteorder
        if dt_endian == '=':
            dt_endian = sys_endian_code
        return dt_endian
    
    def write_array(self, data, endian=None, order=None):
        ''' Write to open file object the flattened numpy array data

        Inputs
        data      - numpy array or object convertable to array
        endian    - endianness of written data
                    (can be None, 'dtype', '<', '>')
                    (if None, get from self.endian)
        order     - order of array to write (C, F)
                    (if None from self.order)
        '''
        endian, order = self._endian_order(endian, order)
        data = N.asarray(data)
        dt_endian = self._endian_from_dtype(data.dtype)
        if not endian == 'dtype':
            if dt_endian != endian:
                data = data.byteswap()
        self.file.write(data.tostring(order=order))
        
    def read_array(self, dt, shape=-1, endian=None, order=None):
        '''Read data from file and return it in a numpy array.
        
        Inputs
        ------
        dt        - dtype of array to be read
        shape     - shape of output array, or number of elements
                    (-1 as number of elements or element in shape
                    means unknown dimension as in reshape; size
                    of array calculated from remaining bytes in file)
        endian    - endianness of data in file
                    (can be None, 'dtype', '<', '>')
                    (if None, get from self.endian)
        order     - order of array in file (C, F)
                    (if None get from self.order)

        Outputs
        arr       - array from file with given dtype (dt)
        '''
        endian, order = self._endian_order(endian, order)
        dt = N.dtype(dt)
        try:
            shape = list(shape)
        except TypeError:
            shape = [shape]
        minus_ones = shape.count(-1)
        if minus_ones == 0:
            pass
        elif minus_ones == 1:
            known_dimensions_size = -N.product(shape,axis=0) * dt.itemsize
            unknown_dimension_size, illegal = divmod(self.remaining_bytes(),
                                                     known_dimensions_size)
            if illegal:
                raise ValueError("unknown dimension doesn't match filesize")
            shape[shape.index(-1)] = unknown_dimension_size
        else:
            raise ValueError(
                "illegal -1 count; can only specify one unknown dimension")
        sz = dt.itemsize * N.product(shape)
        dt_endian = self._endian_from_dtype(dt)
        buf = self.file.read(sz)
        arr = N.ndarray(shape=shape,
                         dtype=dt,
                         buffer=buf,
                         order=order)
        if (not endian == 'dtype') and (dt_endian != endian):
            return arr.byteswap()
        return arr.copy()