1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
"""A Martel format to parse the output from transfac.
Formats:
format Format for a whole file.
"""
import sys
from Martel import *
from Martel import RecordReader
blank_line = Opt(Spaces()) + AnyEol()
MATRIX_LINE = Str("Search for sites by WeightMatrix library:") + Spaces() + \
UntilEol("matrix_file") + AnyEol()
SEQUENCE_LINE = Str("Sequence file:") + Spaces() + \
UntilEol("sequence_file") + AnyEol()
PROFILE_LINE = Str("Site selection profile:") + Spaces() + \
UntilSep("profile_file", sep=" ") + Spaces() + \
UntilEol("profile_description") + AnyEol()
TITLE_LINE = Str("Inspecting sequence ID") + Spaces() + \
UntilSep("entryname", sep=" ") + Spaces() + \
UntilSep("dataclass", sep=";") + Str(";") + Spaces() + \
UntilSep("molecule", sep=";") + Str(";") + Spaces() + \
UntilSep("division", sep=";") + Str(";") + Spaces() + \
UntilSep("sequencelength", sep=" ") + Spaces() + Str("BP") + \
UntilEol() + AnyEol()
def SS(exp): # expression surrounded by optional spaces.
return Opt(Spaces()) + exp + Opt(Spaces())
DATA_LINE = \
SS(UntilSep("matrix_identifier", sep=" |")) + \
Str("|") + \
SS(UntilSep("position", sep=" ")) + \
SS(Str("(") + Group("strand", Any("+-")) + Str(")")) + \
Str("|") + \
SS(Float("core_match")) + \
Str("|") + \
SS(Float("matrix_match")) + \
Str("|") + \
Opt(Spaces()) + UntilEol("sequence") + AnyEol()
SEQUENCES_LENGTH_LINE = \
Spaces() + Str("Total sequences length=") + Integer("sequences_length") + \
AnyEol()
FOUND_SITES_LINE = \
Spaces() + Str("Total number of found sites=") + Integer("found_sites") + \
AnyEol()
SITE_FREQUENCY_LINE = \
Spaces() + Str("Frequency of sites per nucleotide=") + \
Float("sites_per_nucleotide") + AnyEol()
format = MATRIX_LINE + \
SEQUENCE_LINE + \
PROFILE_LINE + \
blank_line + \
TITLE_LINE + \
blank_line + \
Rep(DATA_LINE) + \
blank_line + \
SEQUENCES_LENGTH_LINE + \
blank_line + \
FOUND_SITES_LINE + \
blank_line + \
SITE_FREQUENCY_LINE
|