File: Log.py

package info (click to toggle)
vusb-analyzer 1.1-3
  • links: PTS
  • area: contrib
  • in suites: wheezy
  • size: 356 kB
  • sloc: python: 2,669; makefile: 2
file content (780 lines) | stat: -rw-r--r-- 28,227 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
#
# VUsbTools.Log
# Micah Dowty <micah@vmware.com>
#
# Implements parsers for USB log files. Currently
# this includes slurping usbAnalyzer data out of the
# VMX log, and parsing the XML logs exported by
# Ellisys Visual USB.
#
# Copyright (C) 2005-2009 VMware, Inc. Licensed under the MIT
# License, please see the README.txt. All rights reserved.
#

from __future__ import division
import sys, time, re, math, os, string, atexit
import xml.sax, Queue, threading, difflib
import gtk, gobject
import traceback, gzip, struct
from VUsbTools import Types


class UsbIOParser(Types.psyobj):
    """Parses USBIO log lines and generates Transaction objects appropriately.
       Finished transactions are pushed into the supplied queue.
       """
    lineOriented = True

    def __init__(self, completed):
        self.current = Types.Transaction()
        self.completed = completed

    def parse(self, line, timestamp=None, frame=None, lineNumber=None):
        tokens = line.split()
        finished = None

        if tokens[0] in ('Up', 'Down'):
            self.flush()
            self.current.dir = tokens[0]
            self.current.timestamp = timestamp
            self.current.frame = frame
            self.current.lineNumber = lineNumber
            self.parseKeyValuePairs(tokens[1:])

        # new Log_HexDump() format:
        # USBIO:  000: 80 06 ......
        elif len(tokens) >= 2 and len(tokens[0]) == 4 and len(tokens[1]) == 2:
            data = line.split(':')
            data = data[1].lstrip()
            self.current.appendHexData(data[:48])

        # old Log_HexDump() format:
        # USBIO: 80 06 ......
        elif len(tokens[0]) == 2:
            self.current.appendHexData(line[:48])

        else:
            self.flush()
            self.current.appendDecoded(line.strip())

    def parseKeyValuePairs(self, tokens):
        for token in tokens:
            kv = token.split('=', 1)
            if len(kv) > 1:
                if kv[0] in ('endpt'):
                    base = 16
                else:
                    base = 10
                setattr(self.current, kv[0], int(kv[1], base))

    def flush(self):
        """Force any in-progress transactions to be completed. This should be
           called when you know the USB analyzer is finished outputting
           data, such as when a non-USBIO line appears in the log.
           """
        if self.current.dir:
            self.completed.put(self.current)
            self.current = Types.Transaction()

class TimestampLogParser:
    """Parse a simple format which logs timestamps in nanosecond resolution.
       Lines are of the form:
          <timestamp> <name> args...

       The event name may be 'begin-foo' or 'end-foo' to indicate an event
       which executes over a span of time, or simply 'foo' to mark a single
       point.
       """
    lineOriented = True

    def __init__(self, completed):
        self.epoch = None
        self.nameEndpoints = {}
        self.nextEp = 1
        self.lineNumber = 0
        self.completed = completed

    def flush(self):
        pass

    def parse(self, line):
        self.lineNumber += 1
        tokens = line.split()
        try:

            # Extract the time, convert to seconds
            nanotime = int(tokens[0])
            if not self.epoch:
                self.epoch = nanotime
            timestamp = (nanotime - self.epoch) / 1000000000.0

            # Detect the start- or end- prefix
            name = tokens[1]
            if name.startswith("begin-"):
                name = name.split('-', 1)[1]
                dirs = ('Down',)
            elif name.startswith("end-"):
                name = name.split('-', 1)[1]
                dirs = ('Up',)
            else:
                dirs = ('Down', 'Up')

            # Generate an 'endpoint' for the event name
            try:
                endpoint = self.nameEndpoints[name]
            except KeyError:
                endpoint = self.nextEp
                self.nameEndpoints[name] = endpoint
                self.nextEp = endpoint + 1

            for dir in dirs:
                trans = Types.Transaction()
                trans.dir = dir
                trans.timestamp = timestamp
                trans.lineNumber = self.lineNumber
                trans.endpt = endpoint
                trans.dev = 0
                trans.status = 0
                trans.datalen = 0x1000
                trans.appendDecoded(" ".join(tokens[1:]))
                self.completed.put(trans)
        except:
            print "Error on line %d:" % self.lineNumber
            traceback.print_exc()


class VmxLogParser(UsbIOParser):
    """Read the VMX log, looking for new USBIO lines and parsing them.
    """
    frame = None
    epoch = None
    lineNumber = 0

    def parse(self, line):
        self.lineNumber += 1

        # Local to the UHCI core
        l = line.split("UHCI:")
        if len(l) == 2:
            m = re.search("- frame ([0-9]+) -", l[1])
            if m:
                self.frame = int(m.group(1))
                # Don't let SOF markers start the clock
                if self.epoch is not None:
                    self.completed.put(Types.SOFMarker(self.parseRelativeTime(line),
                                                       self.frame, self.lineNumber))
                return

        # Local to the EHCI core
        l = line.split("EHCI:")
        if len(l) == 2:
            m = re.search("Execute frame ([0-9]+)[\. ]", l[1])
            if m:
                self.frame = int(m.group(1))
                # Don't let SOF markers start the clock
                if self.epoch is not None:
                    self.completed.put(Types.SOFMarker(self.parseRelativeTime(line),
                                                       self.frame, self.lineNumber))
                return

        # Generic analyzer URBs
        l = line.split("USBIO:")
        if len(l) == 2:
            UsbIOParser.parse(self, l[1][:-1], self.parseRelativeTime(line),
                              self.frame, self.lineNumber)
        else:
            self.flush()

    def parseRelativeTime(self, line):
        """Start the clock when we see our first USB log line"""
        t = self.parseTime(line)
        if self.epoch is None:
            self.epoch = t
        return t - self.epoch

    _timeCache = (None, None)

    def parseTime(self, line):
        """Return a unix-style timestamp for the given line.
           XXX: This assumes the current year, so logs that straddle
                years will have a giant discontinuity in timestamps.
           """
        # Cache the results of strptime. It only changes every
        # second, and this was taking more than 50% of our parsing time!
        stamp = line[:15]
        savedStamp, parsed = self._timeCache
        if savedStamp != stamp:
            parsed = time.strptime(stamp, "%b %d %H:%M:%S")
            self._timeCache = stamp, parsed

        now = time.localtime()
        try:
            usec = int(line[16:19])
        except ValueError:
            usec = 0
        return usec / 1000.0 + time.mktime((
            now.tm_year, parsed.tm_mon, parsed.tm_mday,
            parsed.tm_hour, parsed.tm_min, parsed.tm_sec,
            parsed.tm_wday, parsed.tm_yday, parsed.tm_isdst))


def parseInt(attrs, name, default=None):
    """The Ellisys logs include commas in their integers"""
    try:
        return int(attrs[name].replace(",", ""))
    except (KeyError, ValueError):
        return default

def parseFloat(attrs, name, default=None):
    """The Ellisys logs include commas and spaces in their floating point numbers"""
    try:
        return float(attrs[name].replace(",", "").replace(" ", ""))
    except (KeyError, ValueError):
        return default


class EllisysXmlHandler(xml.sax.handler.ContentHandler):
    """Handles SAX events from an XML log exported by Ellisys
       Visual USB. The completed USB transactions are pushed into
       the provided completion queue.
       """
    frameNumber = None
    device = None
    endpoint = None
    current = None
    characterHandler = None

    def __init__(self, completed):
        self.pipes = {}
        self.pending = {}
        self.completed = completed
        self._frameAttrs = {}

    def startElement(self, name, attrs):
        # This will always call self.startElement_%s where %s is the
        # element name, but the profiler showed us spending quite a lot
        # of time just figuring out who to call, even if this was cached
        # in a dictionary. The tests below are ordered to keep very
        # frequent elements running fast.

        if name == "StartOfFrame":
            # Just stow the SOF attributes, decode them if we end up
            # actually needing them later.
            self._frameAttrs = attrs

        elif name == "data":
            self.characterHandler = self.current.appendHexData

        elif name == "Packet":
            self.startElement_Packet(attrs)

        elif name == "Transaction":
            self.startElement_Transaction(attrs)

        elif name == "Reset":
            self.startElement_Reset(attrs)

    def endElement(self, name):
        self.characterHandler = None
        if name == 'Document':
            for pipe in self.pipes.keys():
                self.completeUrb(pipe, 'End of Log')

    def startElement_Transaction(self, attrs):
        self.device = parseInt(attrs, 'device', 0)
        self.endpoint = parseInt(attrs, 'endpoint')

    def startElement_Reset(self, attrs):
        # Error out any transactions that are active during a reset
        for pipe in self.pipes.keys():
            self.completeUrb(pipe, 'Bus Reset')

    def beginUrb(self, pipe):
        """Simulate a new URB being created on the supplied pipe. This
           begins a Down transaction and makes it pending and current.
           """
        t = Types.Transaction()
        t.dir = 'Down'
        t.dev, t.endpt = pipe
        t.timestamp = self.timestamp
        t.frame = parseInt(self._frameAttrs, 'frameNumber')

        t.status = 0

        self.pipes[pipe] = t
        self.pending[pipe] = t
        self.current = t

    def flipUrb(self, pipe):
        """Begin the Up phase on a particular pipe. This
           completes the Down transaction, and makes an Up
           current (but not pending)
           """
        del self.pending[pipe]
        down = self.pipes[pipe]
        self.completed.put(down)

        up = Types.Transaction()
        up.dir = 'Up'
        up.dev, up.endpt = pipe

        # Up and Down transactions share setup data, if applicable
        if down.hasSetupData():
            up.data = down.data[:8]

        self.pipes[pipe] = up
        self.current = up

    def completeUrb(self, pipe, id):
        """Complete the Up phase on a pipe"""
        if pipe in self.pending:
            self.flipUrb(pipe)
        assert pipe in self.pipes

        t = self.pipes[pipe]
        del self.pipes[pipe]
        self.current = None

        t.timestamp = self.timestamp
        t.frame = parseInt(self._frameAttrs, 'frameNumber')

        if id in ('ACK', 'NYET'):
            t.status = 0
        else:
            t.status = id
        self.completed.put(t)

    def startElement_Packet(self, attrs):
        id = attrs['id']

        # Fast exit for common packets we don't care about
        if id in ('SOF', 'DATA0', 'DATA1'):
            return

        self.timestamp = parseFloat(attrs, 'time')
        if self.endpoint is None:
            return

        if self.endpoint == 0:
            # EP0 is a special case for us, since its transactions
            # consiste of several phases. We always begin with SETUP.
            # If the request has an input stage, we'll see an OUT after
            # that as a handshake. If not, the handshake is an empty
            # IN stage.

            pipe = self.device, 0

            if id == 'SETUP':
                self.beginUrb(pipe)
                self.ep0FinalStage = False

            elif id == 'IN':
                if pipe in self.pending:
                    self.flipUrb(pipe)
                else:
                    self.current = self.pipes[pipe]
                if self.current.data and (ord(self.current.data[0]) & 0x80) == 0:
                    # This is an output request, IN is our last stage
                    self.ep0FinalStage = True

            elif id == 'OUT':
                self.current = self.pipes[pipe]
                if self.current.data and (ord(self.current.data[0]) & 0x80):
                    # This is an input request, OUT is our last stage
                    self.ep0FinalStage = True

            elif id == 'PING':
                # An acknowledged PING packet should never end a control transfer
                self.ep0FinalStage = False

            elif pipe in self.pipes and (
                id == 'STALL' or (id == 'ACK' and self.ep0FinalStage)):
                self.completeUrb(pipe, id)

        else:
            # It's really annoying that the Ellisys logs strip the
            # direction bit from the endpoint number. We have to recover
            # this ourselves.
            if id == 'IN':
                self.endpoint = self.endpoint | 0x80
            pipe = self.device, self.endpoint

            if id in ('OUT', 'IN', 'PING'):
                # These packets indicate that we'd like to be transmitting
                # data to a particular endpoint- so the operating system must
                # now have an active URB.
                if pipe in self.pipes:
                    # Finish a previous packet that wasn't acknowledged.
                    # This will be frequent if isochronous transfers are involved!
                    self.completeUrb(pipe, 'No Handshake')

                self.beginUrb(pipe)

            if pipe in self.pending and id in ('NAK', 'NYET', 'STALL', 'IN'):
                self.flipUrb(pipe)

            if pipe in self.pipes:
                if id == 'ACK':
                    # This accounts for combining individual low-level USB packets
                    # into the larger packets that should be associated with a URB.
                    # We only end a URB when a short packet is transferred.
                    #
                    # FIXME: Determine the real max packet size, rather than
                    #        using this hardcoded nonsense.
                    if len(self.current.data) & 0x3F:
                        self.completeUrb(pipe, id)

                elif id in ('NYET', 'STALL'):
                    # Always complete on an error condition
                    self.completeUrb(pipe, id)

    def characters(self, content):
        # This extra level of indirection seems to be necessary, I guess Expat is
        # binding our functions once at initialization.
        if self.characterHandler:
            self.characterHandler(content)

Types.psycoBind(EllisysXmlHandler)


class EllisysXmlParser:
    """Parses XML files exported from Ellisys Visual USB. This
       is just a glue object that sets up an XML parser and
       sends SAX events to the EllisysXmlHandler.
       """
    lineOriented = False

    def __init__(self, completed):
        self.completed = completed
        self.xmlParser = xml.sax.make_parser()
        self.xmlParser.setContentHandler(EllisysXmlHandler(completed))

    def parse(self, line):
        self.xmlParser.feed(line)


class UsbmonLogParser:
    """Parses usbmon log lines and generates Transaction objects appropriately.
       Finished transactions are pushed into the supplied queue.

       This parser was originally contributed by Christoph Zimmermann.
       """
    lineOriented = True
    lineNumber = 0

    def __init__(self, completed):
        self.epoch = None
        self.trans = Types.Transaction()
        self.trans.frame = 0
        self.setupData = None
        self.completed = completed

    def parse(self, line, timestamp=None, frame=None):
        self.lineNumber += 1
        tokens = line.split()
        try:
            # Do a small stupid sanity check if this is a correct usbmon log line
            try:
                if len(tokens) < 4:
                    return
                if not(int(tokens[0],16) and int(tokens[1]) and
                       (tokens[2] in ('S', 'C', 'E'))):
                    return
            except:
                print "Error on line %d:" % self.lineNumber
                return

            # Copied log file format description of the usbmon kernel
            # facility You can find the original manual including how
            # to use usbmon in your kernel sources: <linux
            # sources>/Documentation/usb/usbmon.txt
            #
            # Copied text starts here:
            # Any text format data consists of a stream of events,
            # such as URB submission, URB callback, submission
            # error. Every event is a text line, which consists of
            # whitespace separated words. The number or position of
            # words may depend on the event type, but there is a set
            # of words, common for all types.

            # Here is the list of words, from left to right:

            # - URB Tag. This is used to identify URBs, and is
            #   normally an in-kernel address of the URB structure in
            #   hexadecimal, but can be a sequence number or any other
            #   unique string, within reason.

            self.trans.lineNumber = self.lineNumber

            # - Timestamp in microseconds, a decimal number. The
            #   timestamp's resolution depends on available clock, and
            #   so it can be much worse than a microsecond (if the
            #   implementation uses jiffies, for example).

            # Extract the time, convert to seconds
            microtime = int(tokens[1])
            if not self.epoch:
                self.epoch = microtime
            timestamp = (microtime - self.epoch) / 1000000.0

            self.trans.timestamp = timestamp

            # - Event Type. This type refers to the format of the
            #   event, not URB type.  Available types are: S -
            #   submission, C - callback, E - submission error.

            if tokens[2] == 'S':
                self.trans.dir = 'Down'
            else:
                self.trans.dir = 'Up'

            # - "Address" word (formerly a "pipe"). It consists of
            #   four fields, separated by colons: URB type and
            #   direction, Bus number, Device address, Endpoint
            #   number.  Type and direction are encoded with two bytes
            #   in the following manner:
            #
            #     Ci Co   Control input and output
            #     Zi Zo   Isochronous input and output
            #     Ii Io   Interrupt input and output
            #     Bi Bo   Bulk input and output
            #
            #   Bus number, Device address, and Endpoint are decimal
            #   numbers, but they may have leading zeros, for the sake
            #   of human readers.
            #
            #   Note that older kernels seem to omit the bus number field.
            #   We can parse either format.

            pipe = tokens[3].split(':')

            self.trans.dev = int(pipe[-2])
            self.trans.endpt = int(pipe[-1])

            if pipe[0][1] == 'i' and self.trans.endpt != 0:
                # Input endpoint
                self.trans.endpt |= 0x80

            if len(pipe) >= 4:
                self.trans.dev += int(pipe[-3]) * 1000

            # - URB Status word. This is either a letter, or several
            #   numbers separated by colons: URB status, interval,
            #   start frame, and error count. Unlike the "address"
            #   word, all fields save the status are
            #   optional. Interval is printed only for interrupt and
            #   isochronous URBs. Start frame is printed only for
            #   isochronous URBs. Error count is printed only for
            #   isochronous callback events.
            #
            #   The status field is a decimal number, sometimes
            #   negative, which represents a "status" field of the
            #   URB. This field makes no sense for submissions, but is
            #   present anyway to help scripts with parsing. When an
            #   error occurs, the field contains the error code.
            #
            #   In case of a submission of a Control packet, this
            #   field contains a Setup Tag instead of an group of
            #   numbers. It is easy to tell whether the Setup Tag is
            #   present because it is never a number. Thus if scripts
            #   find a set of numbers in this word, they proceed to
            #   read Data Length (except for isochronous URBs).  If
            #   they find something else, like a letter, they read the
            #   setup packet before reading the Data Length or
            #   isochronous descriptors.
            #
            # - Setup packet, if present, consists of 5 words: one of
            #   each for bmRequestType, bRequest, wValue, wIndex,
            #   wLength, as specified by the USB Specification 2.0.
            #   These words are safe to decode if Setup Tag was
            #   's'. Otherwise, the setup packet was present, but not
            #   captured, and the fields contain filler.
            #
            # - Number of isochronous frame descriptors and
            #   descriptors themselves.  If an Isochronous transfer
            #   event has a set of descriptors, a total number of them
            #   in an URB is printed first, then a word per descriptor,
            #   up to a total of 5. The word consists of 3
            #   colon-separated decimal numbers for status, offset, and
            #   length respectively. For submissions, initial length is
            #   reported. For callbacks, actual length is reported.

            if tokens[4] in ('s'):
                # This is a setup packet
                # Example data stage: 23 01 0010 0002 0040

                self.trans.status = 0

                data = ''.join(tokens[5:10])
                self.trans.appendHexData(data)

                # save the setup data to prepend it to the setup packet data stage
                self.setupData = data

            else:
                status_word = tokens[4].split(':')
                self.trans.status = int(status_word[0])

                # - Data Length. For submissions, this is the requested
                #   length. For callbacks, this is the actual length.

                if len(tokens) >= 7 :
                    self.trans.datalen = int(tokens[5])

                    # - Data tag. The usbmon may not always capture data, even
                    #   if length is nonzero.  The data words are present only
                    #   if this tag is '='.

                    # - Data words follow, in big endian hexadecimal
                    #   format. Notice that they are not machine words, but
                    #   really just a byte stream split into words to make it
                    #   easier to read. Thus, the last word may contain from
                    #   one to four bytes.  The length of collected data is
                    #   limited and can be less than the data length report in
                    #   Data Length word.

                    if tokens[6] in ('='):
                        if self.setupData:  # check if this is a setup package data stage
                            # prepend setup data for the decoders
                            self.trans.appendHexData(self.setupData)
                            self.setupData = None

                        self.trans.appendHexData(''.join(tokens[7:]))

            self.completed.put(self.trans)
            self.trans = Types.Transaction()

        # End of copied usbmon description text

        # End of log file parsing

        except:
            print "Error on line %d:" % self.lineNumber
            traceback.print_exc()


class Follower(threading.Thread):
    """A thread that continuously scans a file, parsing each line"""
    pollInterval = 0.1
    running = True
    progress = 0.0
    progressInterval = 0.2
    progressExpiration = 0

    def __init__(self, filename, parser, progressQueue=None, tailMode=False):
        self.filename = filename
        self.parser = parser
        self.progressQueue = progressQueue

        if os.path.splitext(filename)[1] == ".gz":
            # On a gzip file, we need to read the uncompressed filesize from the footer
            f = open(filename, "rb")
            f.seek(-4, 2)
            self.fileSize = struct.unpack("<l", f.read(4))[0]
            f.seek(0)
            self.file = gzip.GzipFile(fileobj=f)
        else:
            self.file = open(filename)
            self.fileSize = os.fstat(self.file.fileno()).st_size

        if tailMode:
            # Start at the end
            self.file.seek(0, 2)

        threading.Thread.__init__(self)
        atexit.register(self.stop)
        self.progressCallbacks = []

    def run(self):
        try:
            while self.running:
                if self.parser.lineOriented:
                    line = self.file.readline()
                else:
                    line = self.file.read(16384)
                if line:
                    self.parser.parse(line)

                    # Compute our progress only every progressInterval seconds
                    now = time.clock()
                    if now >= self.progressExpiration:
                        self.setProgress(min(1.0, self.file.tell() / self.fileSize))
                        self.progressExpiration = now + self.progressInterval
                else:
                    self.setProgress(1.0)
                    time.sleep(self.pollInterval)
        except KeyboardInterrupt:
            gtk.main_quit()

    def setProgress(self, progress):
        self.progress = progress
        if self.progressQueue:
            self.progressQueue.put(("Loading %s" % os.path.basename(self.filename),
                                    self.progress))

    def stop(self):
        # Keep the queue empty so it doesn't deadlock on put()
        if not self.running:
            return
        self.running = False
        try:
            while 1:
                self.parser.completed.get(False)
        except Queue.Empty:
            pass
        self.join()


class QueueSink:
    """Polls a Queue for new items, via the Glib main loop.
       When they're available, calls a callback with them.
       """
    interval = 200
    timeSlice = 0.25
    maxsize = 512
    batch = range(10)

    def __init__(self, callback):
        self.queue = Queue.Queue(self.maxsize)
        self.callback = callback
        self.poll()

    def poll(self):
        try:
            deadline = time.clock() + self.timeSlice
            while time.clock() < deadline:
                # This avoids calling time.clock() once per queue item.
                for _ in self.batch:
                    try:
                        i = self.queue.get(False)
                    except Queue.Empty:
                        # We have nothing to do, set a longer interval
                        gobject.timeout_add(self.interval, self.poll)
                        return False
                    else:
                        self.callback(i)

        except KeyboardInterrupt:
            gtk.main_quit()

        # Come back after GTK's event queue is idle
        gobject.idle_add(self.poll)
        return False


def chooseParser(filename):
    """Return an appropriate log parser class for the provided filename.
       This implementation does not try to inspect the file's content,
       it just looks at the filename's extension.
       """
    base, ext = os.path.splitext(filename)

    if ext == ".gz":
        return chooseParser(base)
    if ext == ".xml":
        return EllisysXmlParser
    if ext == ".tslog":
        return TimestampLogParser
    if ext == ".mon":
        return UsbmonLogParser
    return VmxLogParser