File: speclist.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (65 lines) | stat: -rw-r--r-- 2,286 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""Parser for the SPECLIST.TXT file in SWISS-PROT.

You probably want to use the variables 'record' (for a single record)
and 'format' (for a set of records).

"""
from Martel import *


def SkipLinesToNoEOL(expr):
    return Rep(AssertNot(expr) + ToEol()) + expr
    


DESCRIPTION = Str("Description:") + Spaces() + UntilEol("description") + AnyEol()
NAME = Str("Name:") + Spaces() + UntilEol("name") + AnyEol()
RELEASE = Str("Release:") + Spaces() + UntilEol("release") + AnyEol()

TOTAL_CODES = Str("Total number of organism identification codes currently defined:") + Spaces() + Integer("num_organism_codes") + Str(".") + AnyEol()

TABLE_HEADER = Group("table_header",
                     Str("Code") + Spaces() + Str("Taxon") + Spaces() +
                     Str("N=Official name") + ToEol() +
                     Spaces() + Str("Node") + Spaces() +
                     Str("C=Common name") + ToEol() +
                     Spaces() + Str("S=Synonym") + ToEol() +
                     Rep1(Any(" _")) + AnyEol()
                     )

_dash_line = Rep1(Str("-")) + AnyEol()
COPYRIGHT = Group("copyright",
                  _dash_line +
                  Str("SWISS-PROT is copyright.") + ToEol() +
                  SkipLinesToNoEOL(_dash_line) +
                  Rep(AnyEol())
                  )

_code_line = Group("code", Re(r"[A-Z0-9]{1,5}")) + \
             Spaces() + \
             Group("kingdom", Any("ABEV")) + \
             Spaces() + \
             Group("taxon_node", Digits()) + Str(":") + \
             Spaces() + \
             Str("N=") + Group("official_name", UntilEol()) + \
             AnyEol()
_common_line = Spaces() + \
               Str("C=") + Group("common_name", UntilEol()) + \
               AnyEol()
_synonym_line = Spaces() + \
                Str("S=") + Group("synonym", UntilEol()) + \
                AnyEol()
record = Group("record",
               _code_line + Rep(_common_line) + Rep(_synonym_line)
               )

format = Group("format",
               SkipLinesToNoEOL(DESCRIPTION) +
               NAME +
               RELEASE +
               SkipLinesToNoEOL(TOTAL_CODES) +
               SkipLinesToNoEOL(TABLE_HEADER) +
               Rep1(record) +
               AnyEol() +
               COPYRIGHT
               )