File: DBXRef.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (225 lines) | stat: -rw-r--r-- 9,448 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
class DBXRef:
    def __init__(self, dbname, dbid, reftype = None, negate = 0):
        self.dbname = dbname
        self.dbid = dbid
        self.reftype = reftype
        self.negate = negate

    def __str__(self):
        if self.reftype is None:
            reftype = ""
        else:
            reftype = self.reftype + "="
        s = "%s/%s%s" % (self.dbname, reftype, self.dbid)
        if self.negate:
            s = "not(%s)" % s
        return s
    __repr__ = __str__

class BioformatDBName:
    def __getitem__(self, name):
        return name
class UnknownDBName:
    def __getitem__(self, name):
        return "x-unknown-" + name.lower()

dbname_conversions = {
    "bioformat": BioformatDBName(),
    "unknown": UnknownDBName(),
    "sp": {"AARHUS/GHENT-2DPAGE": "x-aarhus-ghent-2dpage",
           "CARBBANK": "x-carbbank",
           "DICTYDB": "x-dictydb",
           "ECO2DBASE": "x-eco2dbase",
           "ECOGENE": "x-ecogene",
           "EMBL": "embl",     # EMBL (in GO)
           "FLYBASE": "fb",    # Flybase (in GO)
           "GCRDB": "x-gcrdb",
           "HIV": "x-hiv",
           "HSC-2DPAGE": "x-hsc",
           "HSSP": "x-hssp",
           "MAIZE-2DPAGE": "x-maize",
           "MAIZEDB": "x-maizedb",
           "MENDEL": "x-mendel",
           "MGD": "mgd",       # (in GO)
           "MIM": "x-mim",
           "PDB": "x-pdb",       # Protein Data Bank
           "PFAM": "x-pfam",
           "PIR": "pir", # GO
           "PROSITE": "x-prosite",
           "REBASE": "x-rebase",
           "SGD": "sgd", # GO
           "STYGENE": "x-stygene",
           "SUBTILIST": "x-subtilist",
           "SWISS-2DPAGE": "x-swiss",
           "TIGR": "tigr", # GO
           "TRANSFAC": "x-transfac",
           "WORMPEP": "x-wormpep",
           "YEPD": "x-yepd",
           "ZFIN": "x-zfin",
           },
    "go": {"CGEN": "cgen",    # Compugen, Inc.
           "DDB": "ddb",      # DictyBase (Dictyostelium discoideum)
           "DDBJ": "ddbj",    # DNA Database of Japan
           "EC": "ec",        # Enzyme Commission
           "EMBL": "embl",    # EMBL Nucleotide Sequence Data Library
           "ENSEMBL": "ensembl", # ENSEMBL
           "ENZYME": "enzyme",   # ENZYME
           "FB": "fb",        # FlyBase
           "GB": "gb",        # GenBank
           "GO": "go",        # Gene Ontology
           "GXD": "gxd",      # Gene Expression Database (mouse)
           "IPR": "ipr",      # InterPro
           "ISBN": "isbn",    # International Standard Book Number
           "IUBMB": "iubmb",  # International Union of Biochemistry
                              #      and Molecular Biology
           "IUPAC": "iupac",  # International Union of Pure and Applied
                              #      Chemistry
           "MEDLINE": "medline", # MEDLINE
           "MGD": "mgd",     # Mouse Genome Database
           "MGI": "mgi",     # Mouse Genome Informatics
           "NC-IUBMB": "NC-IUBMB",
                             #  Nomenclature Committee of the International
                             #     Union of Biochemistry and Molecular Biology
           "PIR": "pir",     # PIR
           "PMID": "pmid",   # PubMed
           "Pombase": "pombase", # Schizosaccharomyces pombe
           "Pompep": "pompep",   # Schizosaccharomyces pombe Protein
                                 #    Sequence Database
           "RESID": "resid", # RESID (protein post-translational modifications)
           "SGD": "sgd",     # Saccharomyces Genome Database
           "SP": "sp",       # SWISS-PROT
           "SWALL": "swall", # SWISS-PROT + TrEMBL + TrEMBLnew
           "TAIR": "tair",   # The Arabidopsis Information Resource
           "taxonID": "taxonid", # Taxonomy ID
           "TC": "tc",       # Transport Commission
           "TIGR": "tigr",   # The Institute of Genome Research
           "TR": "tr",       # TrEMBL
           "WB": "wb",       # WormBase (Caenorhabditis elegans)
    },
    # http://www.ncbi.nlm.nih.gov/collab/db_xref.html
    "genbank": {
      "ATCC": "x-atcc",  # American Type Culture Collection database
                         #    /db_xref="ATCC:123456"
      "ATCC(in host)": "x-atcc-host", # See above
      "ATCC(dna)": "x-atcc-dna",      # See above

      "BDGP_EST": "x-bdgp-est", #  Berkeley Drosophila Genome Project
                                #       EST database
                                #   /db_xref="BDGP_EST:123456"
      
      "BDGP_INS": "x-bdgp-ins", #  Berkeley Drosophila Genome Project
                                #      database -- Insertion
                                #   /db_xref="BDGP_INS:123456"


      "dbEST": "x-dbest",  #  EST database maintained at the NCBI.
                           #  /db_xref="dbEST:123456"

      "dbSNP": "x-dbsnp",  #  Variation database maintained at the NCBI.
                           #  /db_xref="dbSNP:4647"

      "dbSTS": "x-dbsts",  # STS database maintained at the NCBI.
                           # /db_xref="dbSTS:456789"

      "ENSEMBL": "ensembl", #  Database of automatically annotated genomic data
                            # /db_xref="ENSEMBL:HUMAN-Clone-AC005612"
                            # /db_xref="ENSEMBL:HUMAN-Gene-ENSG00000007102" 

      "ESTLIB": "x-estlib", # EBI's EST library identifier  #'
                            # /db_xref="ESTLIB:1200"

      "FANTOM_DB": "x-fantom-db", # Database of Functional Annotation of Mouse
                                  # /db_xref="FANTOM_DB:0610005A07"

      "FLYBASE": "fb", # Database of Genetic and molecular data of Drosophila.
                       # /db_xref="FLYBASE:FBgn0000024"

      "GDB": "x-gdb",  # Human Genome Database accession numbers.
                       # /db_xref="GDB:G00-128-600"

      "GI": "x-gi",    # GenInfo identifier, used as a unique sequence
                       # identifier for nucleotide and proteins.
                       # /db_xref="GI:1234567890"

      "GO": "go",      # Gene Ontology Database identifier
                       # /db_xref="GO:123"

      "IMGT/LIGM": "x-imgt-ligm", #  Immunogenetics database, immunoglobulins
                                  #  and T-cell receptors
                                  # /db_xref="IMGT/LIGM:U03895"

      "IMGT/HLA": "x-imgt-hla",   # Immunogenetics database, human MHC
                                  # /db_xref="IMGT/HLA:HLA00031"


      "LocusID": "x-locus-id", # NCBI LocusLink ID.
                               # /db_xref="LocusID:51199"

      "MaizeDB": "x-maizedb",  # Maize Genome Database unique identifiers.
                               # /db_xref="MaizeDB:Probe/79847"

      "MGD": "mgd",  # Mouse Genome Database accession numbers.
                     # /db_xref="MGD:123456"

      "MGI": "mgi",  # Medicago Genome Initiative
                     # /db_xref="MGI:S:20819"

      "MIM": "x-mim", # Mendelian Inheritance in Man numbers.
                      # /db_xref="MIM:123456"

      "niaEST": "x-niaEST", # NIA Mouse cDNA Project
                            # /db_xref="niaEST:L0304H12-3"

      "PIR": "pir", # Protein Information Resource accession numbers.
                    # /db_xref="PIR:S12345"

      "PSEUDO": "x-pseudo-embl", #  EMBL pseudo protein identifier
                                 # /db_xref="PSEUDO:CAC44644.1"

      "RATMAP": "x-ratmap", #  Rat Genome Database 
                            # /db_xref="RATMAP:5"

      "RiceGenes": "x-ricegenes", #  Rice database accession numbers.
                                  # /db_xref="RiceGenes:AA231856"

      "REMTREMBL": "x-remtrembl",
              # Computer-annotated protein sequence database containing
              # the translations of those codings sequences (CDS) present
              # in the EMBL Nucleotide Sequence Database that won't be  '
              # included in SWISS-PROT. These include: immunoglobulins and
              # T-cell receptors, synthetic sequences, patent application
              # sequences, small fragments, CDS not coding for real 
              # proteins and truncated proteins.
              # example:      /db_xref="REMTREMBL:CAC01666"

      "RZPD": "x-rzpd", # Resource Centre Primary Database Clone Identifiers
                        # /db_xref="RZPD:IMAGp998I142450Q6"

      "SGD": "sgd",  # Saccharomyces Genome Database accession numbers.
                     # /db_xref="SGD:L0000470"

      "SoyBase": "x-soybase", #  Glycine max Genome Database 
                              # /db_xref="SoyBase:Satt005"

      "SPTREMBL": "x-sptrembl",  # is this the same as "swall" ?
              # Computer-annotated protein sequence database 
              # supplementing SWISS-PROT and containing the 
              # translations of all coding sequences (CDS) 
              # present in the EMBL Nucleotide Sequence 
              # Database not yet integrated in SWISS-PROT. 
              #   /db_xref="SPTREMBL:Q00177"                    

      "SWISS-PROT": "sp", # Swiss-Prot protein database accession numbers.
                          # /db_xref="SWISS-PROT:P12345"

      "taxon": "taxonid", #  NCBI taxonomic identifier.
                          # /db_xref="taxon:4932"
      },
}

def from_parser(dbname_style, dbname, idtype, dbid, negate):
    try:
        dbname = dbname_conversions[dbname_style][dbname]
    except KeyError:
        dbname = "x-unknown2-%s--%s" % (dbname_style, dbname)
    return DBXRef(dbname, dbid, idtype, negate)