File: ipi.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (64 lines) | stat: -rw-r--r-- 2,308 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""Expression for IPI format.

IPI is nearly swissprot, but contains some differents which makes the
Swissprot parsers choke.
"""

from Bio import Std
import Martel
from Martel import Time
import sprot40

# The ID line contains a versioned period number
ID_exp = Martel.Group("ID",
                  Martel.Str("ID   ") + \
                  Std.dbid(Martel.Group("entry_name", Martel.Re("[\w.]+")),
                      {"type": "primary", "dbname": "sp"}) + \
                  Martel.Spaces() + \
                  Martel.Word("data_class_table") + \
                  Martel.Str(";") + Martel.Spaces() + \
                  Martel.Word("molecule_type") + \
                  Martel.Str(";") + Martel.Spaces() + \
                  Martel.Digits("sequence_length") + \
                  Martel.Str(" AA.") + \
                  Martel.AnyEol()
                  )

# The DT formatted lines look different, and there is not
# a third DT line for annotations
# DT   04-MAR-2003 (IPI Human rel. 2.17, Created)
# DT   04-MAR-2003 (IPI Human rel. 2.17, Last sequence update)

DT_created_exp = (Martel.Str("DT   ") +
                  Time.make_expression("%(DD)-%(Jan)-%(YYYY)") + \
                  Martel.Str(" (IPI Human rel. ") + \
                  Martel.Float("release") + \
                  Martel.Str(", Created)") + Martel.AnyEol())

DT_seq_update_exp = (Martel.Str("DT   ") +
                  Time.make_expression("%(DD)-%(Jan)-%(YYYY)") + \
                  Martel.Str(" (IPI Human rel. ") + \
                  Martel.Float("release") + \
                  Martel.Str(", Last sequence update)") + Martel.AnyEol())

DT_ann_update_exp = (Martel.Str("DT   ") +
                  Time.make_expression("%(DD)-%(Jan)-%(YYYY)") + \
                  Martel.Str(" (IPI Human rel. ") + \
                  Martel.Float("release") + \
                  Martel.Str(", Last annotation update)") + Martel.AnyEol())


replacements = [
    ("ID", ID_exp),
    ("DT_created", DT_created_exp),
    ("DT_seq_update", DT_seq_update_exp),
    ("DT_ann_update", Martel.Opt(DT_ann_update_exp))
    ]

record = Martel.replace_groups(sprot40.record, replacements)


format_expression = Martel.replace_groups(
    sprot40.format_expression, replacements)

format = Martel.replace_groups(sprot40.format, replacements)