File: File.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (161 lines) | stat: -rw-r--r-- 4,645 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Code for more fancy file handles.


Classes:
UndoHandle     File object decorator with support for undo-like operations.
StringHandle   Wraps a file object around a string.
SGMLHandle     File object that automatically strips SGML tags from data.

SGMLStripper   Object that strips SGML.

"""
import os
import string
import StringIO
import sgmllib

class UndoHandle:
    """A Python handle that adds functionality for saving lines.

    Saves lines in a LIFO fashion.

    Added methods:
    saveline    Save a line to be returned next time.
    peekline    Peek at the next line without consuming it.

    """
    def __init__(self, handle):
        self._handle = handle
        self._saved = []

    def readlines(self, *args, **keywds):
        lines = self._saved + self._handle.readlines(*args,**keywds)
        self._saved = []
        return lines

    def readline(self, *args, **keywds):
        if self._saved:
            line = self._saved.pop(0)
        else:
            line = self._handle.readline(*args,**keywds)
        return line

    def read(self, size=-1):
        if size == -1:
            saved = string.join(self._saved, "")
            self._saved[:] = []
        else:
            saved = ''
            while size > 0 and self._saved:
                if len(self._saved[0]) <= size:
                    size = size - len(self._saved[0])
                    saved = saved + self._saved.pop(0)
                else:
                    saved = saved + self._saved[0][:size]
                    self._saved[0] = self._saved[0][size:]
                    size = 0
        return saved + self._handle.read(size)

    def saveline(self, line):
        if line:
            self._saved = [line] + self._saved

    def peekline(self):
        if self._saved:
            line = self._saved[0]
        else:
            line = self._handle.readline()
            self.saveline(line)
        return line

    def tell(self):
        lengths = map(len, self._saved)
        sum = reduce(lambda x, y: x+y, lengths, 0)
        return self._handle.tell() - sum

    def seek(self, *args):
        self._saved = []
        self._handle.seek(*args)

    def __getattr__(self, attr):
        return getattr(self._handle, attr)

# I could make this faster by using cStringIO.
# However, cStringIO (in v1.52) does not implement the
# readlines method.
StringHandle = StringIO.StringIO



class SGMLHandle:
    """A Python handle that automatically strips SGML tags from data.

    """
    def __init__(self, handle):
        """SGMLStripper(handle)

        handle is a file handle to SGML-formatted data.
        
        """
        self._handle = handle
        self._stripper = SGMLStripper()

    def read(self, *args, **keywds):
        data = self._handle.read(*args, **keywds)
        return self._stripper.strip(data)

    def readline(self, *args, **keywds):
        line = self._handle.readline(*args, **keywds)
        return self._stripper.strip(line)

    def readlines(self, *args, **keywds):
        lines = self._handle.readlines(*args, **keywds)
        for i in range(len(lines)):
            lines[i] = self._stripper.strip(str)
        return lines

    def __getattr__(self, attr):
        return getattr(self._handle, attr)


class SGMLStripper:
    class MyParser(sgmllib.SGMLParser):
        def __init__(self):
            sgmllib.SGMLParser.__init__(self)
            self.data = ''
        def handle_data(self, data):
            self.data = self.data + data

    def __init__(self):
        self._parser = SGMLStripper.MyParser()

    def strip(self, str):
        """S.strip(str) -> string

        Strip the SGML tags from str.

        """
        if not str:  # empty string, don't do anything.
            return ''
        # I need to make sure that I don't return an empty string if
        # the buffer is not empty.  This can happen if there's a newline
        # character embedded within a tag.  Thus, I'll first check to
        # see if the last character is a newline.  If it is, and it's stripped
        # away, I'll add it back.
        is_newline = str[-1] in ['\n', '\r']
        
        self._parser.data = ''    # clear the parser's data (don't reset)
        self._parser.feed(str)
        if self._parser.data:
            str = self._parser.data
        elif is_newline:
            str = '\n'
        else:
            str = ''
        return str