File: array_import.py

package info (click to toggle)
python-scipy 0.6.0-12
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 32,016 kB
  • ctags: 46,675
  • sloc: cpp: 124,854; ansic: 110,614; python: 108,664; fortran: 76,260; objc: 424; makefile: 384; sh: 10
file content (487 lines) | stat: -rw-r--r-- 17,194 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
# Authors:  Travis Oliphant, Trent Oliphant
# with support from Lee Barford's group at Agilent, Inc.
#

"""This module allows for the loading of an array from an ASCII
Text File

"""

__all__ = ['read_array', 'write_array']

# Standard library imports.
import os
import re
import sys
import types

# Numpy imports.
import numpy
from numpy import array, take, concatenate, asarray, real, imag
# Sadly, this module is still written with typecodes in mind.
from numpy.oldnumeric import Float

# Local imports.
import numpyio

default = None
_READ_BUFFER_SIZE = 1024*1024
#_READ_BUFFER_SIZE = 1000
#_READ_BUFFER_SIZE = 160

# ASCII Text object stream with automatic (un)compression and URL access.
#
# Adapted from
#    TextFile class Written by: Konrad Hinsen <hinsen@cnrs-orleans.fr>
#
# Written by Travis Oliphant and Trent Oliphant
#   with support from Agilent, Inc.
#


def convert_separator(sep):
    newsep = ''
    for k in sep:
        if k in '.^$*+?{[\\|()':
            newsep = newsep + '\\' + k
        else:
            newsep = newsep + k
    return newsep

def build_numberlist(lines):
    if lines is default:
        linelist = [-1]
    else:
        linelist = []
        errstr = "Argument lines must be a sequence of integers and/or range tuples."
        try:
            for num in lines[:-1]:  # handle all but last element
                if type(num) not in [types.IntType, types.TupleType]:
                    raise ValueError, errstr
                if isinstance(num, types.IntType):
                    linelist.append(num)
                else:
                    if not 1 < len(num) < 4:
                        raise ValueError, "Tuples must be valid range tuples."
                    linelist.extend(range(*num))
        except TypeError:
            raise ValueError, errstr
        num = lines[-1]
        if type(num) is types.IntType:
            linelist.append(num)
        elif type(num) is types.TupleType:
            if [types.IntType]*len(num) != map(type, num):
                if len(num) > 1 and num[1] is not None:
                    raise ValueError, errstr
            if len(num) == 1:
                linelist.extend([num[0],-1])
            elif len(num) == 2:
                if num[1] is None:
                    linelist.extend([num[0], -1])
                else:
                    linelist.extend(range(*num))
            elif len(num) == 3:
                if num[1] is None:
                    linelist.extend([num[0], -num[2]])
                else:
                    linelist.extend(range(*num))
            else:
                raise ValueError, errstr
    return linelist

def get_open_file(fileobject, mode='rb'):
    try:
        # this is the duck typing check: if fileobject
        # can be used is os.path.expanduser, it is a string
        # otherwise it is a fileobject
        fileobject = os.path.expanduser(fileobject)

        if mode[0]=='r' and not os.path.exists(fileobject):
            raise IOError, (2, 'No such file or directory: '
                            + fileobject)
        else:
            try:
                file = open(fileobject, mode)
            except IOError, details:
                file = None
                if type(details) == type(()):
                    details = details + (fileobject,)
                raise IOError, details
    except AttributeError:
        # it is assumed that the fileobject is a python
        # file object if it can not be used in os.path.expanduser
        file = fileobject

    return file


class ascii_stream(object):
    """Text files with line iteration

    Ascii_stream instances can be used like normal read-only file objects
    (i.e. by calling readline() and readlines()), but can
    also be used as sequences of lines in for-loops.

    Finally, ascii_stream objects accept file names that start with '~' or
    '~user' to indicate a home directory(for reading only).

    Constructor: ascii_stream(|fileobject|, |lines|,|comment|),
    where |fileobject| is either an open python file object or
    the name of the file, |lines| is a sequence of integers
    or tuples(indicating ranges) of lines to be read, |comment| is the
    comment line identifier """

    def __init__(self, fileobject, lines=default, comment="#",
                 linesep='\n'):
        if not isinstance(comment, types.StringType):
            raise ValueError, "Comment must be a string."
        self.linelist = build_numberlist(lines)
        self.comment = comment
        self.lencomment = len(comment)
        self.file = get_open_file(fileobject, mode='r')
        self.should_close_file = not (self.file is fileobject)
        self._pos = self.file.tell()
        self._lineindex = 0
        if self.linelist[-1] < 0:
            self._linetoget = self.linelist[-1]
        else:
            self._linetoget = 0
        self._oldbuflines = 0
        self._linesplitter = linesep
        self._buffer = self.readlines(_READ_BUFFER_SIZE)
        self._totbuflines = len(self._buffer)

    def readlines(self, sizehint):
        buffer = self.file.read(sizehint)
        lines = buffer.split(self._linesplitter)
        if len(buffer) < sizehint:  # EOF
            if buffer == '':
                return []
            else:
                return lines
        else:
            if len(lines) < 2:
                raise ValueError, "Buffer size too small."
            backup = len(lines[-1])
            self.file.seek(-backup, 1)
            return lines[:-1]

    def __del__(self):
        if hasattr(self.file,'close') and self.should_close_file:
            self.file.close()

    def __getitem__(self, item):
        while 1:
            line = self.readnextline()
            if line is None:
                raise IndexError
            if len(line) < self.lencomment or line[:self.lencomment] != self.comment:
                break
        return line

    def readnextline(self):
        if self.linelist[self._lineindex] >= 0:
            self._linetoget = self.linelist[self._lineindex]
            self._lineindex += 1
        else:
            self._linetoget = self._linetoget - self.linelist[self._lineindex]
        while self._linetoget >= self._totbuflines:
            self._buffer = self.readlines(_READ_BUFFER_SIZE)
            self._oldbuflines = self._totbuflines
            self._totbuflines += len(self._buffer)
            if (self._totbuflines == self._oldbuflines):
                return None
        line = self._buffer[self._linetoget - self._oldbuflines]
        return line

    def close(self):
        self.file.close()

    def flush(self):
        self.file.flush()


def move_past_spaces(firstline):
    ind = 0
    firstline = firstline.lstrip()
    while firstline[ind] not in [' ','\n','\t','\v','\f','\r']:
        ind += 1
    return firstline[ind:], ind


def extract_columns(arlist, collist, atype, missing):
    if collist[-1] < 0:
        if len(collist) == 1:
            toconvlist = arlist[::-collist[-1]]
        else:
            toconvlist = take(arlist,collist[:-1],0)
            toconvlist = concatenate((toconvlist,
                                      arlist[(collist[-2]-collist[-1])::(-collist[-1])]))
    else:
        toconvlist = take(arlist, collist,0)

    return numpyio.convert_objectarray(toconvlist, atype, missing)


# Given a string representing one line, a separator tuple, a list of
#  columns to read for each element of the atype list and a missing
#  value to insert when conversion fails.

# Regular expressions for detecting complex numbers and for dealing
#  with spaces between the real and imaginary parts

_obj = re.compile(r"""
      ([0-9.eE]+)            # Real part
      ([\t ]*)               # Space between real and imaginary part
      ([+-])                 # +/- sign
      ([\t ]*)               # 0 or more spaces
      (([0-9.eE]+[iIjJ])
      |([iIjJ][0-9.eE]+))    # Imaginary part
      """, re.VERBOSE)

_not_warned = 1
def process_line(line, separator, collist, atype, missing):
    global _not_warned
    strlist = []
    line = _obj.sub(r"\1\3\5",line)  # remove spaces between real
                                     # and imaginary parts of complex numbers

    if _not_warned:
        warn = 0
        if (_obj.search(line) is not None):
            warn = 1
            for k in range(len(atype)):
                if atype[k] in numpy.typecodes['Complex']:
                    warn = 0
        if warn:
            numpy.disp("Warning: Complex data detected, but no requested typecode was complex.")
            _not_warned = 0
    for mysep in separator[:-1]:
        if mysep is None:
            newline, ind = move_past_spaces(line)
            strlist.append(line[:ind])
            line = newline
        else:
            ind = line.find(mysep)
            strlist.append(line[:ind])
            line = line[ind+len(mysep):]
    strlist.extend(line.split(separator[-1]))
    arlist = array(strlist,'O')
    N = len(atype)
    vals = [None]*N
    for k in range(len(atype)):
        vals[k] = extract_columns(arlist, collist[k], atype[k], missing)
    return vals

def getcolumns(stream, columns, separator):
    global _not_warned
    comment = stream.comment
    lenc = stream.lencomment
    k, K = stream.linelist[0], len(stream._buffer)
    while k < K:
        firstline = stream._buffer[k]
        if firstline != '' and firstline[:lenc] != comment:
            break
        k = k + 1
    if k == K:
        raise ValueError, "First line to read not within %d lines of top." % K
    firstline = stream._buffer[k]
    N = len(columns)
    collist = [None]*N
    colsize = [None]*N
    for k in range(N):
        collist[k] = build_numberlist(columns[k])
    _not_warned = 0
    val = process_line(firstline, separator, collist, [Float]*N, 0)
    for k in range(N):
        colsize[k] = len(val[k])
    return colsize, collist

def convert_to_equal_lists(cols, atype):
    if not isinstance(cols, types.ListType):
        cols = [cols]
    if not isinstance(atype, types.ListType):
        atype = [atype]
    N = len(cols) - len(atype)
    if N > 0:
        atype.extend([atype[-1]]*N)
    elif N < 0:
        cols.extend([cols[-1]]*(-N))
    return cols, atype


def read_array(fileobject, separator=default, columns=default, comment="#",
               lines=default, atype=Float, linesep='\n',
               rowsize=10000, missing=0):
    """Return an array or arrays from ascii_formatted data in |fileobject|.

    Inputs:

      fileobject -- An open file object or a string for a valid filename.
                    The string can be prepended by "~/" or "~<name>/" to
                    read a file from the home directory.
      separator -- a string or a tuple of strings to indicate the column
                   separators.  If the length of the string tuple is less
                   than the total number of columns, then the last separator
                   is assumed to be the separator for the rest of the columns.
      columns -- a tuple of integers and range-tuples which describe the
                 columns to read from the file.  A negative entry in the
                 last column specifies the negative skip value to the end.
                 Example:  columns=(1, 4, (5, 9), (11, 15, 3), 17, -2)
                         will read [1,4,5,6,7,8,11,14,17,19,21,23,...]
                 If multiple arrays are to be returned, then this argument
                 should be an ordered list of such tuples.  There should be
                 one entry in the list for each arraytype in the atype list.
      lines   -- a tuple with the same structure as columns which indicates
                 the lines to read.
      comment -- the comment character (line will be ignored even if it is
                 specified by the lines tuple)
      linesep -- separator between rows.
      missing -- value to insert in array when conversion to number fails.
      atype -- the typecode of the output array.  If multiple outputs are
               desired, then this should be a list of typecodes.  The columns
               to fill the array represented by the given typecode is
               determined from the columns argument.  If the length of atype
               does not match the length of the columns list, then, the
               smallest one is expanded to match the largest by repeatedly
               copying the last entry.
      rowsize -- the allocation row size (array grows by this amount as
               data is read in).

    Output -- the 1 or 2d array, or a tuple of output arrays of different
              types, sorted in order of the first column to be placed
              in the output array.

    """

    global _not_warned
    # Make separator into a tuple of separators.
    if type(separator) in [types.StringType, type(default)]:
        sep = (separator,)
    else:
        sep = tuple(separator)
    # Create ascii_object from |fileobject| argument.
    ascii_object = ascii_stream(fileobject, lines=lines, comment=comment, linesep=linesep)
    columns, atype = convert_to_equal_lists(columns, atype)
    numout = len(atype)
    # Get the number of columns to read and expand the columns argument
    colsize, collist = getcolumns(ascii_object, columns, sep)
    # Intialize the output arrays
    outrange = range(numout)
    outarr = []
    typecodes = "".join(numpy.typecodes.values())
    for k in outrange:
        if not atype[k] in typecodes:
            raise ValueError, "One of the array types is invalid, k=%d" % k
        outarr.append(numpy.zeros((rowsize, colsize[k]),atype[k]))
    row = 0
    block_row = 0
    _not_warned = 1
    for line in ascii_object:
        if line.strip() == '':
            continue
        vals = process_line(line, sep, collist, atype, missing)
        for k in outrange:
            outarr[k][row] = vals[k]
        row += 1
        block_row += 1
        if block_row >= rowsize:
            for k in outrange:
                outarr[k].resize((outarr[k].shape[0] + rowsize,colsize[k]))
            block_row = 0
    for k in outrange:
        if outarr[k].shape[0] != row:
            outarr[k].resize((row,colsize[k]))
        a = outarr[k]
        if a.shape[0] == 1 or a.shape[1] == 1:
            outarr[k] = numpy.ravel(a)
    if len(outarr) == 1:
        return outarr[0]
    else:
        return tuple(outarr)


# takes 1-d array and returns a string
def str_array(arr, precision=5,col_sep=' ',row_sep="\n",ss=0):
    thestr = []
    arr = asarray(arr)
    N,M = arr.shape
    thistype = arr.dtype.char
    nofloat = (thistype in '1silbwu') or (thistype in 'Oc')
    cmplx = thistype in 'FD'
    fmtstr = "%%.%de" % precision
    cmpnum = pow(10.0,-precision)
    for n in xrange(N):
        theline = []
        for m in xrange(M):
            val = arr[n,m]
            if ss and abs(val) < cmpnum:
                val = 0*val
            if nofloat or val==0:
                thisval = str(val)
            elif cmplx:
                rval = real(val)
                ival = imag(val)
                thisval = eval('fmtstr % rval')
                if (ival >= 0):
                    istr = eval('fmtstr % ival')
                    thisval = '%s+j%s' % (thisval, istr)
                else:
                    istr = eval('fmtstr % abs(ival)')
                    thisval = '%s-j%s' % (thisval, istr)
            else:
                thisval = eval('fmtstr % val')
            theline.append(thisval)
        strline = col_sep.join(theline)
        thestr.append(strline)
    return row_sep.join(thestr)


def write_array(fileobject, arr, separator=" ", linesep='\n',
                precision=5, suppress_small=0, keep_open=0):
    """Write a rank-2 or less array to file represented by fileobject.

    Inputs:

      fileobject -- An open file object or a string to a valid filename.
      arr -- The array to write.
      separator -- separator to write between elements of the array.
      linesep -- separator to write between rows of array
      precision -- number of digits after the decimal place to write.
      suppress_small -- non-zero to round small numbers down to 0.0
      keep_open = non-zero to return the open file, otherwise, the file is closed.
    Outputs:

      file -- The open file (if keep_open is non-zero)
    """
    file = get_open_file(fileobject, mode='wa')
    rank = numpy.rank(arr)
    if rank > 2:
        raise ValueError, "Can-only write up to 2-D arrays."

    if rank == 0:
        h = 1
        arr = numpy.reshape(arr, (1,1))
    elif rank == 1:
        h = numpy.shape(arr)[0]
        arr = numpy.reshape(arr, (h,1))
    else:
        h = numpy.shape(arr)[0]
        arr = asarray(arr)

    for ch in separator:
        if ch in '0123456789-+FfeEgGjJIi.':
            raise ValueError, "Bad string for separator"

    astr = str_array(arr, precision=precision,
                     col_sep=separator, row_sep=linesep,
                     ss = suppress_small)
    file.write(astr)
    file.write('\n')
    if keep_open:
        return file
    else:
        if file is sys.stdout or file is sys.stderr:
            return
        file.close()
    return