File: blocks.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (80 lines) | stat: -rw-r--r-- 2,731 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# This was tested against BLOCKS-12.0, June 2000

from Martel import *
from Martel import RecordReader
from Bio import Std

# Header goes up to the line starting with "ID"
header = Rep(AssertNot(Str("ID   ")) + \
             ToEol())

# ID   kringle; BLOCK
# ID   14-3-3; BLOCK
#  but not!
# IDSA_METJA|Q58270  (  46) GGKRIRPYLTV  11
ID = Str("ID   ") + Std.dbid(ToSep(sep = ";"), {"type": "primary"}) + \
     Str(" BLOCK") + AnyEol()

# AC   IPB000001A; distance from previous block=(10,266)
AC = Str("AC   ") + Std.dbid(ToSep(sep = ";"), {"type": "accession"}) + \
     Str(" distance from previous block=(") + \
     Integer("dist1") + Str(",") + Integer("dist2") + \
     Str(")") + AnyEol()


# DE   Kringle domain
#  If the DE line is long, it doen't fold .. it's all on one line
DE = Str("DE   ") + ToEol("description")


# BL   CCY;  width=14; seqs=44; 99.5%=717; strength=1059
BL = Str("BL   ") + ToSep("protomat_id", ";") + \
     Str("  width=") + Digits("width") + \
     Str("; seqs=") + Digits("numseqs") + \
     Str("; 99.5%=") + Digits("protomat_count") + \
     Str("; strength=") + Digits("strength") + \
     AnyEol()


# PLMN_BOVIN|P06868  (  60) CEEETDFVCRAFQY  26
# ^^^^^^^^^^^^^^^^^
#                     ^^^^-- number of segments
#                           ^^^^^^^^^^^^^^-- matching sequence
#                                           ^^-- weight
#
identifier = (Std.dbxref_dbid(UntilSep(sep = "|."), 
                          {"dbname": "swissprot", "type": "primary"}) + \
              Str("|") + \
              Std.dbxref_dbid(UntilSep(sep = " "), 
                              {"dbname": "swissprot", "type": "accession"})) |\
              Std.dbxref_dbid(UntilSep(sep = " "))
                              
segment = AssertNot(Re(r".. ")) + \
          identifier + \
          Re(r" *\( *") + \
          Integer("position") + \
          Re(r"\) *") + \
          Word("matching_sequence") + Spaces() + \
          Digits("weight") + AnyEol()

segment_block = Rep1(segment | AnyEol())

end = Str("//") + AnyEol()

record = Group("record",
               ID + AC + DE + BL + segment_block + end)

format_expression = header + Rep1(record)
format = HeaderFooter("dataset", {"format": "blocks/12"},
                      header, RecordReader.Until, ("ID ",),
                      record, RecordReader.EndsWith, ("//\n",),
                      None, None, None)

if __name__ == "__main__":
    import os
    from xml.sax import saxutils
    filename = "/home/dalke/ftps/databases/blocks/unix/blocks-12.0/blocks.dat.Z"
    infile = os.popen("zcat " + filename)
    parser = format.make_parser(debug_level = 0)
    parser.setContentHandler(saxutils.XMLGenerator())
    parser.parseFile(infile)