File: MAST.py

package info (click to toggle)
python-biopython 1.64%2Bdfsg-5
links: PTS, VCS
area: main
in suites: jessie, jessie-kfreebsd
size: 44,416 kB
ctags: 12,472
sloc: python: 153,759; xml: 67,286; ansic: 9,003; sql: 1,488; makefile: 144; sh: 59
file content (149 lines) | stat: -rw-r--r-- 4,535 bytes
# Copyright 2008 by Bartek Wilczynski.
# Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

from Bio.Alphabet import IUPAC
from Bio.Motif.Parsers.MEME import MEMEMotif


class Record(object):
    """The class for holding the results from a MAST run.
    
    A MAST.Record holds data about matches between motifs and sequences.
    The motifs held by the Record are objects of the class MEMEMotif.
    
    Methods:
    get_motif_by_name (motif_name): returns a MEMEMotif with the given
    name.
    """

    def __init__ (self):
        self.sequences = []
        self.version = ""
        self.database = ""
        self.diagrams = {}
        self.alphabet = None
        self.motifs = []
    
    def get_motif_by_name (self, name):
        for m in self.motifs:
            if m.name == name:
                return m

def read(handle):
    """read(handle)"""
    record = Record()
    __read_version(record, handle)
    __read_database_and_motifs(record, handle)
    __read_section_i(record, handle)
    __read_section_ii(record, handle)
    __read_section_iii(record, handle)
    return record


# Everything below is private


def __read_version(record, handle):
    for line in handle:
        if "MAST version" in line:
            break
    else:
        raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
    record.version = line.strip().split()[2]


def __read_database_and_motifs(record, handle):
    for line in handle:
        if line.startswith('DATABASE AND MOTIFS'):
            break
    line = next(handle)
    if not line.startswith('****'):
        raise ValueError("Line does not start with '****':\n%s" % line)
    line = next(handle)
    if not 'DATABASE' in line:
        raise ValueError("Line does not contain 'DATABASE':\n%s" % line)
    words = line.strip().split()
    record.database = words[1]
    if words[2] == '(nucleotide)':
        record.alphabet = IUPAC.unambiguous_dna
    elif words[2] == '(peptide)':
        record.alphabet = IUPAC.protein
    for line in handle:
        if 'MOTIF WIDTH' in line:
            break
    line = next(handle)
    if not '----' in line:
        raise ValueError("Line does not contain '----':\n%s" % line)
    for line in handle:
        if not line.strip():
            break
        words = line.strip().split()
        motif = MEMEMotif()
        motif.alphabet = record.alphabet
        motif.name = words[0]
        motif.length = int(words[1])
        # motif.add_instance(words[2])
        record.motifs.append(motif)


def __read_section_i(record, handle):
    for line in handle:
        if line.startswith('SECTION I:'):
            break
    for line in handle:
        if line.startswith('SEQUENCE NAME'):
            break
    line = next(handle)
    if not line.startswith('---'):
        raise ValueError("Line does not start with '---':\n%s" % line)
    for line in handle:
        if not line.strip():
            break
        else:
            sequence, description_evalue_length = line.split(None, 1)
            record.sequences.append(sequence)
    line = next(handle)
    if not line.startswith('****'):
        raise ValueError("Line does not start with '****':\n%s" % line)


def __read_section_ii(record, handle):
    for line in handle:
        if line.startswith('SECTION II:'):
            break
    for line in handle:
        if line.startswith('SEQUENCE NAME'):
            break
    line = next(handle)
    if not line.startswith('---'):
        raise ValueError("Line does not start with '---':\n%s" % line)
    for line in handle:
        if not line.strip():
            break
        elif line.startswith(" "):
            diagram = line.strip()
            record.diagrams[sequence] += diagram
        else:
            sequence, pvalue, diagram = line.split()
            record.diagrams[sequence] = diagram
    line = next(handle)
    if not line.startswith('****'):
        raise ValueError("Line does not start with '****':\n%s" % line)


def __read_section_iii(record, handle):
    for line in handle:
        if line.startswith('SECTION III:'):
            break
    for line in handle:
        if line.startswith('****'):
            break
    for line in handle:
        if line.startswith('*****'):
            break
    for line in handle:
        if line.strip():
            break