File: Parser.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (630 lines) | stat: -rw-r--r-- 24,697 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
# Copyright 2004 by Jason A. Hackney.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

from string import join
from Bio.Alphabet import IUPAC
from Bio import File
from Bio.ParserSupport import *
from Bio import Seq
from Bio.MEME import Motif
import re

class MEMERecord:
    """A class for holding the results of a MEME run.
    
    A MEMERecord is an object that holds the results from running
    MEME. It implements no methods of its own.
        
    """
    def __init__ (self):
        """__init__ (self)"""
        self.motifs = []
        self.version = ""
        self.datafile = ""
        self.command = ""
        self.alphabet = None
        self.sequence_names = []
        
    def get_motif_by_name (self, name):
        for m in self.motifs:
            if m.name == name:
                return m

class MEMEParser (AbstractParser):
    """A parser for the text output of the MEME program.
    Parses the output into an object of the MEMERecord class.
    
    Methods:
    parse (handle): parses the contents of the file handle passed to it.
    
    Example:
    
    f = open("meme.output.txt")
    parser = MEMEParser()
    meme_record = parser.parse(f)
    for motif in meme_record.motifs:
        for instance in motif.instances:
            print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
    
    """
    def __init__ (self):
        """__init__ (self)"""
        self._scanner = _MEMEScanner()
        self._consumer = _MEMEConsumer()
    
    def parse (self, handle):
        """parse (self, handle)"""
        self._scanner.feed(handle, self._consumer)
        return self._consumer.data
    


class _MEMEScanner:
    """Scanner for MEME output. 
    
    Methods:
    feed
        
    """
    
    def feed (self, handle, consumer):
        """
        Feeds in MEME output for scanning. handle should
        implement the readline method. consumer is 
        a Consumer object that can receive the salient events.
        """
        if isinstance(handle, File.UndoHandle):
            uhandle = handle
        else:
            uhandle = File.UndoHandle(handle)
        
        self._scan_header(uhandle, consumer)
        self._scan_motifs    (uhandle, consumer)
    
    def _scan_header(self, uhandle, consumer):
        try:
            read_and_call_until(uhandle, consumer.noevent, contains = 'MEME version')
        except SyntaxError:
            raise SyntaxError, "Improper input file. File should contain a line starting MEME version."
        read_and_call(uhandle, consumer._version, start = 'MEME version')
        read_and_call_until(uhandle, consumer.noevent, start = 'TRAINING SET')
        read_and_call(uhandle, consumer.noevent, start = 'TRAINING SET')
        read_and_call(uhandle, consumer.noevent, start = '****')
        read_and_call(uhandle, consumer._datafile, start = 'DATAFILE')
        read_and_call(uhandle, consumer._alphabet, start = 'ALPHABET')
        read_and_call(uhandle, consumer.noevent, start = 'Sequence name')
        read_and_call(uhandle, consumer.noevent, start = '----')
        read_and_call_until(uhandle, consumer._sequence_name, start = '***')
        read_and_call_until(uhandle, consumer.noevent, start = 'command:')
        read_and_call(uhandle, consumer._commandline, start = 'command:')
        read_and_call_until(uhandle, consumer.noevent, start = 'MOTIF  1')
    
    def _scan_motifs(self, uhandle, consumer):
        while 1:
            read_and_call(uhandle, consumer._add_motif_with_info, start = 'MOTIF')
            read_and_call_until(uhandle, consumer.noevent, contains = 'sorted by position p-value')
            read_and_call(uhandle, consumer.motif_name, contains = 'sorted by position p-value')
            read_and_call(uhandle, consumer.noevent, start = '---')
            read_and_call(uhandle, consumer.noevent, start = 'Sequence name')
            read_and_call(uhandle, consumer.noevent, start = '---')
            read_and_call_until(uhandle, consumer.add_instance, start = '---')
            read_and_call_until(uhandle, consumer.noevent, start = 'log-odds matrix')
            read_and_call(uhandle, consumer.noevent)
            read_and_call_until(uhandle, consumer.add_to_logodds, start = '---')
            read_and_call_until(uhandle, consumer.noevent, start = 'letter-probability matrix')
            read_and_call(uhandle, consumer.noevent, start = 'letter-probability matrix')    
            read_and_call_until(uhandle, consumer.add_to_pssm, start = '---')
            read_and_call_until(uhandle, consumer.noevent, start = 'Time')
            read_and_call(uhandle, consumer.noevent, start = 'Time')
            read_and_call(uhandle, consumer.noevent, blank = 1)
            read_and_call(uhandle, consumer.noevent, start = '***')
            read_and_call_while(uhandle, consumer.noevent, blank = 1)
            read_and_call(uhandle, consumer.noevent, start = '***')
            line = safe_peekline(uhandle)
            if line.startswith("SUMMARY OF MOTIFS"):
                break
    


class _MEMEConsumer:
    """
    Consumer that can receive events from MEME Scanner.
    
    This is the Consumer object that should be passed to the 
    MEME Scanner.
    """
    
    def __init__ (self):
        self.current_motif = None
        self.sequence_names = []
        self.data = MEMERecord()
    
    def _version (self, line):
        line = line.strip()
        ls = line.split()
        self.data.version = ls[2]
    
    def _datafile (self, line):
        line = line.strip()
        line = line.replace('DATAFILE= ','')
        self.data.datafile = line
    
    def _alphabet (self, line):
        line = line.strip()
        line = line.replace('ALPHABET= ','')
        if line == 'ACGT':
            al = IUPAC.unambiguous_dna
        else:
            al = IUPAC.protein
        self.data.alphabet = al
    
    def _sequence_name (self, line):
        line = line.strip()
        ls = line.split()
        self.data.sequence_names.append(ls[0])
        if len(ls) == 6:
            self.data.sequence_names.append(ls[3])
    
    def _commandline (self, line):
        line = line.strip()
        line = line.replace('command: ','')
        self.data.command = line
    
    def _add_motif_with_info (self, line):
        line = line.strip()
        ls = line.split()
        motif = Motif.MEMEMotif()
        motif._length(ls[4])
        motif._numoccurrences(ls[7])
        motif._evalue(ls[13])
        motif._alphabet(self.data.alphabet)
        self.data.motifs.append(motif)
        self.current_motif = motif
    
    def motif_name (self, line):
        line = line.strip()
        ls = line.split()
        name = join(ls[0:2], ' ')
        self.current_motif._name(name)
    
    def add_instance (self, line):
        line = line.strip()
        ls = line.split()
        if self.data.command.find('revcomp') != -1:
            seq = Seq.Seq(ls[5], self.data.alphabet)
            self.current_motif.add_instance_from_values(name = ls[0], sequence = seq, start = ls[2], pvalue = ls[3], strand = ls[1])
        else:
            seq = Seq.Seq(ls[4], self.data.alphabet)
            self.current_motif.add_instance_from_values(name = ls[0], sequence = seq, start = ls[1], pvalue = ls[2])
    
    def add_to_pssm (self, line):
        line = line.strip()
        sl = line.split()
        thisposition = tuple([float(i) for i in sl])
        self.current_motif.add_to_pssm(thisposition)
    
    def add_to_logodds (self, line):
        line = line.strip()
        sl = line.split()
        thisposition = tuple([float(i) for i in sl])    
        self.current_motif.add_to_logodds(thisposition)
    
    def noevent (self,line):
        pass
    


class _MASTConsumer:
    """
    Consumer that can receive events from _MASTScanner.
    
    A _MASTConsumer parses lines from a mast text output file.
    The motif match diagrams are parsed using line buffering. 
    Each of the buffering functions have a dummy variable that is
    required for testing using the Bio.ParserSupport.TaggingConsumer.
    If this variable isn't there, the TaggingConsumer barfs. In
    the _MASTScanner, None is passed in the place of this variable.
    """
    def __init__ (self):
        self.data = MASTRecord()
        self._current_seq = ""
        self._line_buffer = []
        self._buffer_size = 0
        self._buffered_seq_start = 0
    
    def _version (self, line):
        line = line.strip()
        ls = line.split()
        self.data._version(ls[2])
    
    def _database (self, line):
        line = line.strip()
        ls = line.split()
        self.data._database(ls[1])
        al = ""
        if ls[2] == '(nucleotide)':
            al = IUPAC.unambiguous_dna
            self.data._alphabet(al)        
        else:
            al = IUPAC.protein
            self.data._alphabet(al)
        
    def _add_motif (self, line):
        line = line.strip()
        ls = line.split()
        m = Motif.MEMEMotif()
        m._alphabet(self.data.alphabet)
        m._length(ls[1])
        name = ls[0]
        m._name(name)
        m._consensus(ls[2])
        self.data._add_motif(m)
    
    def _add_match_diagram (self, line):
        line = line.strip()
        ls = line.split()
        self.data._add_diagram_for_sequence(ls[1], self._current_seq)
        ds = ls[1].split('_')
        i = 0
        start = 0
        for i in range(0,len(ds)):
            if ds[i].find('[') != -1 or ds[i].find('<') != -1:
                inst = Motif.Instance()
                inst._seqname (self._current_seq)
                inst._start (start)
                r = re.compile('\d+')
                mn = r.findall(ds[i])[0]
                if ds[i].find('-') != -1:
                    inst.strand = '-'
                else:
                    inst.strand = '+'
                motif = self.data.get_motif_by_name(mn)
                motif.add_instance(inst)
                start += motif.length
            else:
                start += int(ds[i])            
    
    def _add_sequence_match_with_diagram (self, line):
        line = line.strip()
        ls = line.split()
        self.data._add_sequence(ls[0])
        self.data._add_diagram_for_sequence(ls[2],ls[0])
        ds = ls[2].split('_')
        i = 0
        start = 0
        for i in range(0,len(ds)):
            if ds[i].find('+') != -1 or ds[i].find('-') != -1:
                inst = Motif.Instance()
                inst._seqname (ls[0])
                inst._start (start)
                r = re.compile('\d+')
                mn = r.findall(ds[i])[0]
                if ds[i].find('-') != -1:
                    inst.strand = '-'
                else:
                    inst.strand = '+'
                motif = self.data.get_motif_by_name(mn)
                motif.add_instance(inst)
                start += motif.length
            else:
                start += int(ds[i])            
    
    def _add_diagram_from_buffer (self, dummy):
        line = ""
        for l in self._line_buffer:
            line += l.strip()
        ls = line.split()
        self.data._add_diagram_for_sequence(ls[1], self._current_seq)
        ds = ls[1].split('_')
        i = 0
        start = 0
        for i in range(0,len(ds)):
            if ds[i].find('[') != -1 or ds[i].find('<') != -1:
                inst = Motif.Instance()
                inst._seqname (self._current_seq)
                inst._start (start)
                r = re.compile('\d+')
                mn = r.findall(ds[i])[0]
                if ds[i].find('-') != -1:
                    inst.strand = '-'
                else:
                    inst.strand = '+'
                motif = self.data.get_motif_by_name(mn)
                motif.add_instance(inst)
                start += motif.length
            else:
                start += int(ds[i])            
    
    def _set_current_seq (self, line):
        line = line.strip()
        self._current_seq = line
        if not self.data.sequences.count(line):
            self.data.sequences.append(line)
    
    def _add_line_to_buffer (self, line):
        line = line.strip()
        if not line.startswith('*****'):
            self._line_buffer.append(line)
        else:
            return -1
    
    def _parse_buffer (self, dummy):
        """Parses the line buffer to get e-values for each instance of a motif.
        This buffer parser is the most likely point of failure for the 
        MASTParser.
        """
        insts = self.data.get_motif_matches_for_sequence(self._current_seq)    
        if len(insts) > 0:
            
            fullSeq = self._line_buffer[self._buffer_size-1]
            pvals = self._line_buffer[1].split()
            p = 0
            lpval = len(pvals)
            while p < lpval:
                if pvals[p].count('e') > 1:
                #Break blocks up by e and parse into valid floats. This only 
                #works if there are no e-values greater than 1e-5.
                    pvs = []
                    spe = pvals[p].split('e')
                    spe.reverse()
                    dotind = spe[1].find('.')
                    if dotind == -1:
                        thispval = spe[1][-1] + 'e' + spe[0]
                    else:
                        thispval = spe[1][dotind-1:] + 'e' + spe[0]
                    pvs.append(thispval)
                    for spi in range(2,len(spe)):
                        dotind = spe[spi].find('.')
                        prevdotind = spe[spi-1].find('.')
                        if dotind != -1:
                            if prevdotind == -1:
                                thispval = spe[spi][dotind-1:] + 'e' + spe[spi-1][:-1]
                            else:
                                thispval = spe[spi][dotind-1:] + 'e' + spe[spi-1][0:prevdotind-1]
                        else:
                            if prevdotind == -1:
                                thispval = spe[spi][-1] + 'e' + spe[spi-1][:-1]
                            else:
                                thispval = spe[spi][-1] + 'e' + spe[spi-1][0:prevdotind-1]
                        pvs.append(thispval)
                    pvs.reverse()
                    if p > 0:
                        pvals = pvals[0:p] + pvs + pvals[p+1:]
                    else:
                        pvals = pvs + pvals[p+1:]
                    lpval = len(pvals)
                p += 1
            i = 0
            if len(pvals) != len(insts):
                sys.stderr.write("Failure to parse p-values for " + self._current_seq +  ":  " + self._line_buffer[1] + " to: " + str(pvals) + "\n")
                pvals = []
#            else:
#                sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n")
            for i in range(0,len(insts)):
                inst = insts[i]
                start = inst.start - self._buffered_seq_start + 1
                thisSeq = fullSeq[start:start+inst.length]
                thisSeq = Seq.Seq(thisSeq, self.data.alphabet)
                inst._sequence(thisSeq)
                if pvals:
                    inst._pvalue(float(pvals[i]))

    def _blank_buffer (self, dummy):
        self._line_buffer = []
        self._buffer_size = 0
    
    def _collapse_buffer(self, dummy):
        if self._buffer_size == 0:
            if len(self._line_buffer) > 0:
                self._buffer_size = len(self._line_buffer)
                ll = self._line_buffer[self._buffer_size-1].split()
                self._line_buffer[self._buffer_size-1] = ll[1]
                self._buffered_seq_start = int(ll[0])
        else:
            i = 0
            for i in range(self._buffer_size, len(self._line_buffer)-1):
                    self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size] + self._line_buffer[i].strip()
            ll = self._line_buffer[len(self._line_buffer)-1].split()
            if int(ll[0]) == self._buffered_seq_start + len(self._line_buffer[self._buffer_size-1]):
                self._line_buffer[self._buffer_size-1] += ll[1]
            else:
                differ = int(ll[0]) - (self._buffered_seq_start + len(self._line_buffer[self._buffer_size-1]))
                self._line_buffer[self._buffer_size-1] += "N"*differ
                self._line_buffer[self._buffer_size-1] += ll[1]
            self._line_buffer = self._line_buffer[0:self._buffer_size]
    
    def _add_motif_match (self, line):
        line = line.strip()
        if line.find('[') != -1 or line.find('<') != -1:
            pass
        elif line.find('e') != -1:
            pass
        elif line.find('+') != -1:
            pass
    
    def noevent (self, line):
        pass
    


class MASTParser(AbstractParser):
    """
    Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
    
    A MASTParser takes a file handle for a MAST text output file and 
    returns a MASTRecord, containing the hits between motifs and 
    sequences. The parser does some unusual line buffering to parse out 
    match diagrams. Really complex diagrams often lead to an error message 
    and p-values not being parsed for a given line.
    
    Methods:
    parse (handle): parses the data from the file handle passed to it.
    
    Example:
    
    f = open("mast_file.txt")
    parser = MASTParser()
    mast_record = parser.parse(f)
    for motif in mast_record.motifs:
        for instance in motif.instances:
            print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
    """
    def __init__ (self):
        self._consumer = _MASTConsumer()
        self._scanner = _MASTScanner()
    
    def parse (self, handle):
        self._scanner.feed(handle, self._consumer)
        return self._consumer.data
    


class _MASTScanner:
    """
    Scanner for MAST text output. 
        
    """
    def feed (self, handle, consumer):
        if isinstance(handle, File.UndoHandle):
            uhandle = handle
        else:
            uhandle = File.UndoHandle(handle)
            
        self._scan_header(uhandle, consumer)
        self._scan_matches(uhandle, consumer)
        self._scan_annotated_matches(uhandle, consumer)
    
    def _scan_header (self, uhandle, consumer):
        try:
            read_and_call_until(uhandle, consumer.noevent, contains = "MAST version")
        except SyntaxError:
            raise SyntaxError, "Improper input file. Does not begin with a line with 'MAST version'"
        read_and_call(uhandle, consumer._version, contains = 'MAST version')
        read_and_call_until(uhandle, consumer.noevent, start = 'DATABASE AND MOTIFS')
        read_and_call(uhandle, consumer.noevent, start = 'DATABASE')
        read_and_call(uhandle, consumer.noevent, start = '****')
        read_and_call(uhandle, consumer._database, contains = 'DATABASE')
        read_and_call_until(uhandle, consumer.noevent, contains = 'MOTIF WIDTH')
        read_and_call(uhandle, consumer.noevent, contains = 'MOTIF')
        read_and_call(uhandle, consumer.noevent, contains = '----')
        read_and_call_until(uhandle, consumer._add_motif, blank = 1)
        read_and_call_until(uhandle, consumer.noevent, start = 'SECTION II:')
    
    def _scan_matches (self, uhandle, consumer):
        read_and_call_until(uhandle, consumer.noevent, start = 'SEQUENCE NAME')
        read_and_call(uhandle, consumer.noevent, start = 'SEQUENCE NAME')
        read_and_call(uhandle, consumer.noevent, start = '---')
#        read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1)
        read_and_call_until(uhandle, consumer.noevent, blank = 1)
        read_and_call(uhandle, consumer.noevent, blank = 1)
    
    def _scan_annotated_matches (self, uhandle, consumer):
        read_and_call_until(uhandle, consumer.noevent, start = 'SECTION III:')
        read_and_call(uhandle, consumer.noevent, start = 'SECTION III:')
        read_and_call_until(uhandle, consumer.noevent, start = '****')
        read_and_call(uhandle, consumer.noevent, start = '****')
        read_and_call_until(uhandle, consumer.noevent, start = '*****')
        read_and_call(uhandle, consumer.noevent)
        read_and_call_while(uhandle, consumer.noevent, blank = 1)
        readMatches = 1
        while readMatches == 1:
            if consumer._current_seq:
                if consumer._buffer_size != 0:
                    consumer._parse_buffer(None)
                consumer._blank_buffer(None)
            read_and_call(uhandle, consumer._set_current_seq)
            read_and_call_until(uhandle, consumer.noevent, start = '  DIAGRAM')
            read_and_call_until(uhandle, consumer._add_line_to_buffer, blank = 1)
            consumer._add_diagram_from_buffer(None)
            consumer._blank_buffer(None)
            read_and_call(uhandle, consumer.noevent, blank = 1)
            while 1:
                line = safe_peekline(uhandle)
                if line.startswith('****'):
                    consumer._parse_buffer(None)
                    readMatches = 0
                    break
                read_and_call_until(uhandle, consumer._add_line_to_buffer, blank = 1)
                read_and_call(uhandle, consumer.noevent, blank = 1)
                consumer._collapse_buffer(None)
                if attempt_read_and_call(uhandle, consumer.noevent, blank = 1):
                    break
                elif attempt_read_and_call(uhandle, consumer.noevent, start = '*****'):
                    consumer._parse_buffer(None)
                    consumer._blank_buffer(None)
                    readMatches = 0
                    break
    


class MASTRecord:
    """The class for holding the results from a MAST run.
    
    A MASTRecord holds data about matches between motifs and sequences.
    The motifs held by the MASTRecord are objects of the class MEMEMotif.
    
    Methods:
    get_motif_matches_for_sequence(sequence_name): returns all of the
        motif matches within a given sequence. The matches are objects of
        the class MEME.Motif.Instance
    get_motif_matches (motif_name): returns all of the matches for a motif
        in the sequences searched. The matches returned are of class 
        MEME.Motif.Instance
    get_motif_by_name (motif_name): returns a MEMEMotif with the given
        name.
    """
    def __init__ (self):
        self.sequences = []
        self.version = ""
        self.matches = []
        self.database = ""
        self.diagrams = {}
        self.alphabet = None
        self.motifs = []
    
    def _version (self, version):
        self.version = version
    
    def _alphabet (self, alphabet):
        if alphabet == IUPAC.protein or alphabet == IUPAC.ambiguous_dna or alphabet == IUPAC.unambiguous_dna:
            self.alphabet = alphabet
        else:
            return -1
    
    def _database(self, database):
        self.database = database
    
    def get_motif_matches_for_sequence (self, seq):
        insts = []
        for m in self.motifs:
            for i in m.instances:
                if i.sequence_name == seq:
                    insts.append(i)
        insts.sort(lambda x,y: cmp(x.start, y.start))
        return insts
    
    def get_motif_matches (self, motif):
        m = self.get_motif_by_name (motif.name)
        return m.instances
    
    def _add_diagram_for_sequence (self, diagram, seq):
        self.diagrams[seq] = diagram
    
    def _add_match (self, match):
        self.matches.append(match)
    
    def _add_sequence (self, sequence):
        self.sequences.append(sequence)
    
    def _add_motif (self, motif):
        self.motifs.append(motif)
    
    def get_motif_by_name (self, name):
        for m in self.motifs:
            if m.name == name:
                return m