1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
# Copyright 1999 by Jeffrey Chang. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Code for more fancy file handles.
Classes:
UndoHandle File object decorator with support for undo-like operations.
StringHandle Wraps a file object around a string.
SGMLHandle File object that automatically strips SGML tags from data.
SGMLStripper Object that strips SGML.
"""
import os
import string
import StringIO
import sgmllib
class UndoHandle:
"""A Python handle that adds functionality for saving lines.
Saves lines in a LIFO fashion.
Added methods:
saveline Save a line to be returned next time.
peekline Peek at the next line without consuming it.
"""
def __init__(self, handle):
self._handle = handle
self._saved = []
def readlines(self, *args, **keywds):
lines = self._saved + self._handle.readlines(*args,**keywds)
self._saved = []
return lines
def readline(self, *args, **keywds):
if self._saved:
line = self._saved.pop(0)
else:
line = self._handle.readline(*args,**keywds)
return line
def read(self, size=-1):
if size == -1:
saved = string.join(self._saved, "")
self._saved[:] = []
else:
saved = ''
while size > 0 and self._saved:
if len(self._saved[0]) <= size:
size = size - len(self._saved[0])
saved = saved + self._saved.pop(0)
else:
saved = saved + self._saved[0][:size]
self._saved[0] = self._saved[0][size:]
size = 0
return saved + self._handle.read(size)
def saveline(self, line):
if line:
self._saved = [line] + self._saved
def peekline(self):
if self._saved:
line = self._saved[0]
else:
line = self._handle.readline()
self.saveline(line)
return line
def tell(self):
lengths = map(len, self._saved)
sum = reduce(lambda x, y: x+y, lengths, 0)
return self._handle.tell() - sum
def seek(self, *args):
self._saved = []
self._handle.seek(*args)
def __getattr__(self, attr):
return getattr(self._handle, attr)
# I could make this faster by using cStringIO.
# However, cStringIO (in v1.52) does not implement the
# readlines method.
StringHandle = StringIO.StringIO
class SGMLHandle:
"""A Python handle that automatically strips SGML tags from data.
"""
def __init__(self, handle):
"""SGMLStripper(handle)
handle is a file handle to SGML-formatted data.
"""
self._handle = handle
self._stripper = SGMLStripper()
def read(self, *args, **keywds):
data = self._handle.read(*args, **keywds)
return self._stripper.strip(data)
def readline(self, *args, **keywds):
line = self._handle.readline(*args, **keywds)
return self._stripper.strip(line)
def readlines(self, *args, **keywds):
lines = self._handle.readlines(*args, **keywds)
for i in range(len(lines)):
lines[i] = self._stripper.strip(str)
return lines
def __getattr__(self, attr):
return getattr(self._handle, attr)
class SGMLStripper:
class MyParser(sgmllib.SGMLParser):
def __init__(self):
sgmllib.SGMLParser.__init__(self)
self.data = ''
def handle_data(self, data):
self.data = self.data + data
def __init__(self):
self._parser = SGMLStripper.MyParser()
def strip(self, str):
"""S.strip(str) -> string
Strip the SGML tags from str.
"""
if not str: # empty string, don't do anything.
return ''
# I need to make sure that I don't return an empty string if
# the buffer is not empty. This can happen if there's a newline
# character embedded within a tag. Thus, I'll first check to
# see if the last character is a newline. If it is, and it's stripped
# away, I'll add it back.
is_newline = str[-1] in ['\n', '\r']
self._parser.data = '' # clear the parser's data (don't reset)
self._parser.feed(str)
if self._parser.data:
str = self._parser.data
elif is_newline:
str = '\n'
else:
str = ''
return str
|