File: test_SeqIO_online.py

package info (click to toggle)
python-biopython 1.78%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 65,756 kB
  • sloc: python: 221,141; xml: 178,777; ansic: 13,369; sql: 1,208; makefile: 131; sh: 70
file content (109 lines) | stat: -rw-r--r-- 3,615 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Copyright 2007-2010 by Peter Cock.  All rights reserved.
# Revisions copyright 2007-2008 by Michiel de Hoon.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Testing online code for fetching sequences, and parsing them.

Uses Bio.SeqIO to parse files downloaded with Bio.GenBank, Bio.WWW.NCBI,
Bio.ExPASy etc.

Goals:
    - Make sure that all retrieval is working as expected.
    - May catch some format changes early too.

"""
import unittest

# We want to test these:
from Bio import Entrez
from Bio import ExPASy

# In order to check any sequences returned
from Bio import SeqIO
from Bio.SeqUtils.CheckSum import seguid
from Bio.SwissProt import SwissProtParserError

import requires_internet

requires_internet.check()

# This lets us set the email address to be sent to NCBI Entrez:
Entrez.email = "biopython@biopython.org"


class ExPASyTests(unittest.TestCase):
    """Tests for Bio.ExPASy module."""

    def test_get_sprot_raw(self):
        """Bio.ExPASy.get_sprot_raw("O23729")."""
        identifier = "O23729"
        handle = ExPASy.get_sprot_raw(identifier)
        try:
            record = SeqIO.read(handle, "swiss")
        except SwissProtParserError as e:
            # This is to catch an error page from our proxy
            if str(e) == "Failed to find ID in first line" and e.line.startswith(
                "<!DOCTYPE HTML"
            ):
                raise OSError from None
        handle.close()
        self.assertEqual(record.id, identifier)
        self.assertEqual(len(record), 394)
        self.assertEqual(seguid(record.seq), "5Y08l+HJRDIlhLKzFEfkcKd1dkM")


class EntrezTests(unittest.TestCase):
    def simple(self, database, formats, entry, length, checksum):
        for f in formats:
            handle = Entrez.efetch(db=database, id=entry, rettype=f, retmode="text")
            if f == "gbwithparts":
                f = "gb"
            record = SeqIO.read(handle, f)
            handle.close()
            # NCBI still takes GI on input, but phasing it out in output
            gi_to_acc = {
                "6273291": "AF191665.1",
                "16130152": "NP_416719.1",
            }
            if entry in gi_to_acc:
                entry = gi_to_acc[entry]
            self.assertTrue(
                (entry in record.name)
                or (entry in record.id)
                or ("gi" in record.annotations and record.annotations["gi"] == entry),
                "%s got %s, %s" % (entry, record.name, record.id),
            )
            self.assertEqual(len(record), length)
            self.assertEqual(seguid(record.seq), checksum)


for database, formats, entry, length, checksum in [
    ("nuccore", ["fasta", "gb"], "X52960", 248, "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU"),
    ("nucleotide", ["fasta", "gb"], "6273291", 902, "bLhlq4mEFJOoS9PieOx4nhGnjAQ"),
    (
        "protein",
        ["fasta", "gbwithparts"],
        "16130152",
        367,
        "fCjcjMFeGIrilHAn6h+yju267lg",
    ),
]:

    def funct(d, f, e, l, c):
        method = lambda x: x.simple(d, f, e, l, c)  # noqa: E731
        method.__doc__ = "Bio.Entrez.efetch(%r, id=%r, ...)" % (d, e)
        return method

    setattr(
        EntrezTests,
        "test_%s_%s" % (database, entry),
        funct(database, formats, entry, length, checksum),
    )
    del funct
del database, formats, entry, length, checksum

if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)