File: filesys.py

package info (click to toggle)
python-dendropy 4.2.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 68,392 kB
  • ctags: 3,947
  • sloc: python: 41,840; xml: 1,400; makefile: 15
file content (338 lines) | stat: -rw-r--r-- 12,526 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#! /usr/bin/env python

##############################################################################
##  DendroPy Phylogenetic Computing Library.
##
##  Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder.
##  All rights reserved.
##
##  See "LICENSE.rst" for terms and conditions of usage.
##
##  If you use this work or any portion thereof in published work,
##  please cite it as:
##
##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
##
##############################################################################

"""
Various utilities in support of filesystem interaction.
"""

import fnmatch
import time
import os
import sys
import re
from threading import Event, Thread, Lock

from dendropy.utility import messaging
_LOG = messaging.get_logger(__name__)

###############################################################################
## Universal Opening

def pre_py34_open(file,
        mode='r',
        buffering=-1,
        encoding=None,
        errors=None,
        newline=None,
        closefd=True,
        opener=None):
    if encoding is not None:
        raise NotImplementedError
    if errors is not None:
        raise NotImplementedError
    if newline is None:
        if mode.startswith("r"):
            mode = mode + "U"
    else:
        raise NotImplementedError
    if closefd is not True:
        raise NotImplementedError
    if opener is not None:
        raise NotImplementedError
    return open(
            file,
            mode=mode,
            buffering=buffering)

###############################################################################
## LineReadingThread

DEFAULT_SLEEP_INTERVAL=0.1
class LineReadingThread(Thread):
    """A thread that will read the input stream - designed to work with a file
    thas is being written. Note that if the file does not end with a newline
    and the keep_going() method does not return False, then the thread will not
    terminate

    self.keep_going()
    is called with each line. (sub classes should override).

    LineReadingThread.__init__ must be called by subclasses.
    """
    def __init__(self,
                lineCallback=None,
                stream=None,
                filename="",
                stop_event=None,
                sleep_interval=DEFAULT_SLEEP_INTERVAL,
                store_lines=False,
                is_file=True,
                subproc=None,
                *args,
                **kwargs):
        """
        __init__ processes the following arguments:

            - ``lineCallback`` is the callable that takes a string that is each line, and returns False to stop reading.  This is a way of using the class without sub-classing and overriding keep_going
            - ``stream`` is in input file-like object
            - ``filename`` can be sent instead of ``stream``, it should be the path to the file to read.
            - ``stop_event`` is an Event, that will kill the thread if it is triggered.
            - ``sleep_interval`` is the interval to sleep while waiting for a new tree to appear.

        All other arguments are passed to the Thread.__init__()
        """
        self.stream = stream
        self.filename = filename
        self.lineCallback = lineCallback
        self.unfinished_line = None
        self.stop_event = stop_event
        self.sleep_interval = sleep_interval
        self.store_lines = store_lines
        if store_lines:
            self.line_list_lock = Lock()
        self.is_file = is_file
        self.lines = []
        self.subproc = subproc
        self.stop_on_subproc_exit = kwargs.get('stop_on_subproc_exit', False)
        Thread.__init__(self,group=None, target=None, name=None,
                        args=tuple(*args), kwargs=dict(**kwargs))

    def wait_for_file_to_appear(self, filename):
        """Blocks until the file ``filename`` appears or stop_event is triggered.

        Returns True if ``filename`` exists.

        Checks for the stop_event *before* checking for the file existence.
        (paup_wrap and raxml_wrap threads depend on this behavior).
        """
        while True:
            if (self.stop_event is not None) and self.stop_event.isSet():
                return False
            if os.path.exists(filename):
                return True
            #_LOG.debug("Waiting for %s" %filename)
            time.sleep(self.sleep_interval)

    def open_file_when_exists(self, filename):
        """Blocks until the file ``filename`` appears and then returns a file
        object opened in rU mode.

        Returns None if the stop event is triggered.
        """
        if self.wait_for_file_to_appear(filename):
            return open(filename, "rU")
        return None


    def run(self):
        if self.stream is None:
            if not self.filename:
                _LOG.debug('"stream" and "filename" both None when LineReadingThread.run called')
                return
            self.stream = self.open_file_when_exists(self.filename)
            if self.stream is None:
                return
        self._read_stream()

    def keep_going(self, line):
        _LOG.debug("In keep_going: " + line)
        if self.store_lines:
            self.line_list_lock.acquire()
            try:
                self.lines.append(line)
                _LOG.debug("self.lines = %s" % str(self.lines))
            finally:
                self.line_list_lock.release()
        if self.lineCallback is None:
            r = True
            if self.subproc:
                _LOG.debug("subproc is not None")
                if self.subproc.returncode is None:
                    _LOG.debug("subproc.returncode is None")
                else:
                    _LOG.debug("subproc.returncode is %d" % self.subproc.returncode)
                    if self.store_lines:
                        _LOG.debug("about to call readlines")
                        line = line + "\n".join(self.stream.readlines())
                    r = False
            else:
                _LOG.debug("subproc is None")
            return r
        return self.lineCallback(line)

    def _read_stream(self):
        self.unfinished_line = ""
        while True:
            if (self.stop_event is not None) and self.stop_event.isSet():
                # when we terminate because of an event setting,
                # we pass any unfinished_line line that we have to
                if not self.unfinished_line is None:
                    self.keep_going(self.unfinished_line)
                break
            _LOG.debug("about to readline")
            line = self.stream.readline()
            if not line:
                _LOG.debug("line is empty")
                if self.stop_on_subproc_exit:
                    self.subproc.poll()
                    if self.subproc is not None:
                        _LOG.debug("subproc is not None")
                        if self.subproc.returncode is not None:
                            _LOG.debug("subproc.returncode is %d" % self.subproc.returncode)
                            _LOG.debug("%s" % repr(self.stream))
                            l = "".join(self.stream.readlines())
                            if l:
                                self.keep_going(l)
                            break
                        else:
                            _LOG.debug("subproc.returncode is None")
                    else:
                        _LOG.debug("subproc is None")
            else:
                _LOG.debug('line is "%s"' % line)
            if not line.endswith("\n"):
                if self.unfinished_line:
                    self.unfinished_line = self.unfinished_line + line
                else:
                    self.unfinished_line = line
                time.sleep(self.sleep_interval)
            else:
                if self.unfinished_line:
                    line = self.unfinished_line + line
                self.unfinished_line = ""
                if not self.keep_going(line):
                    break
        _LOG.debug("LineReadingThread exiting")

###############################################################################
## File Finding

def glob_match(pathname, pattern, respect_case=False, complement=False):
    if respect_case:
        if fnmatch.fnmatchcase(pathname, pattern):
            if complement:
                return False
            else:
                return True
        else:
            if complement:
                return True
            else:
                return False
    else:
        pathname = pathname.lower()
        pattern = pattern.lower()
        if fnmatch.fnmatch(pathname, pattern):
            if complement:
                return False
            else:
                return True
        else:
            if complement:
                return True
            else:
                return False

def find_files(top,
               recursive=True,
               filename_filter=None,
               dirname_filter=None,
               excludes=None,
               complement=False,
               respect_case=False,
               expand_vars=True,
               include_hidden=True):
    if expand_vars:
        top = os.path.abspath(os.path.expandvars(os.path.expanduser(top)))
    if excludes == None:
        excludes = []
    filepaths = []
    if os.path.exists(top):
        for fpath in os.listdir(top):
            abspath = os.path.abspath(os.path.join(top, fpath))
            if os.path.isfile(abspath):
                if (include_hidden or not fpath.startswith('.')) \
                    and (not filename_filter or glob_match(fpath, filename_filter, respect_case, complement)):
                    to_exclude = False
                    for e in excludes:
                        if glob_match(fpath, e, respect_case):
                            to_exclude = True
                    if not to_exclude:
                        filepaths.append(abspath)
            elif os.path.isdir(abspath):
                if recursive:
                    if (include_hidden or not fpath.startswith('.')) \
                        and (not dirname_filter or (glob_match(fpath, dirname_filter, respect_case, complement))):
                        filepaths.extend(find_files(abspath,
                                                     recursive=recursive,
                                                     filename_filter=filename_filter,
                                                     dirname_filter=dirname_filter,
                                                     excludes=excludes,
                                                     complement=complement,
                                                     respect_case=respect_case,
                                                     expand_vars=False))
    filepaths.sort()
    return filepaths


# from http://snippets.dzone.com/posts/show/6313
def find_executable(executable, path=None):
    """Try to find 'executable' in the directories listed in 'path' (a
    string listing directories separated by 'os.pathsep'; defaults to
    os.environ['PATH']).  Returns the complete filename or None if not
    found
    """
    if path is None:
        path = os.environ['PATH']
    paths = path.split(os.pathsep)
    extlist = ['']
    if os.name == 'os2':
        (base, ext) = os.path.splitext(executable)
        # executable files on OS/2 can have an arbitrary extension, but
        # .exe is automatically appended if no dot is present in the name
        if not ext:
            executable = executable + ".exe"
    elif sys.platform == 'win32':
        pathext = os.environ['PATHEXT'].lower().split(os.pathsep)
        (base, ext) = os.path.splitext(executable)
        if ext.lower() not in pathext:
            extlist = pathext
    for ext in extlist:
        execname = executable + ext
        if os.path.isfile(execname):
            return execname
        else:
            for p in paths:
                f = os.path.join(p, execname)
                if os.path.isfile(f):
                    return f
    else:
        return None

###############################################################################
## Dealing with streams that may not have been opened with the universal
## newline option

def get_lines(stream):
    """
    Parse stream into lines, dealing with all line break conventions.
    """
    s = stream.read()
    return re.split(r'\r\n|\n|\r', s)