1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
import itertools
import warnings
import os
import shlex
import sys
import HTSeq
from HTSeq._HTSeq import *
from HTSeq._version import __version__
class FileOrSequence:
""" The construcutor takes one argument, which may either be a string,
which is interpreted as a file name (possibly with path), or a
connection, by which we mean a text file opened for reading, or
any other object that can provide an iterator over strings
(lines of the file).
The advantage of passing a file name instead of an already opened file
is that if an iterator is requested several times, the file will be
re-opened each time. If the file is already open, its lines can be read
only once, and then, the iterator stays exhausted.
Furthermore, if a file name is passed that end in ".gz" or ".gzip"
(case insensitive), it is transparently gunzipped.
"""
def __init__(self, filename_or_sequence):
self.fos = filename_or_sequence
self.should_close = False
self.line_no = None
self.lines = None
try:
os.fspath(self.fos)
self.fos_is_path = True
except TypeError:
# assumed to be a file handle
self.fos_is_path = False
def __enter__(self):
if self.fos_is_path:
fos = os.fspath(self.fos)
self.should_close = True
if fos.lower().endswith((".gz", ".gzip")):
lines = gzip.open(self.fos, 'rt')
else:
lines = open(self.fos)
else:
lines = self.fos
self.lines = lines
return self
def __exit__(self, type, value, traceback):
if self.should_close:
self.lines.close()
self.lines = None
def __iter__(self):
self.line_no = 1
if self.lines is None:
call_exit = True
self.__enter__()
else:
call_exit = False
lines = self.lines
try:
for line in lines:
yield line
self.line_no += 1
finally:
if call_exit:
self.__exit__(None, None, None)
self.line_no = None
def __repr__(self):
if isinstance(self.fos, str):
return "<%s object, connected to file name '%s'>" % (
self.__class__.__name__, self.fos)
else:
return "<%s object, connected to %s >" % (
self.__class__.__name__, repr(self.fos))
def get_line_number_string(self):
if self.line_no is None:
if isinstance(self.fos, str):
return "file %s closed" % self.fos
else:
return "file closed"
if isinstance(self.fos, str):
return "line %d of file %s" % (self.line_no, self.fos)
else:
return "line %d" % self.line_no
|