1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
|
# Copyright 2007-2010 by Peter Cock. All rights reserved.
# Revisions copyright 2007-2008 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Testing online code for fetching sequences, and parsing them.
Uses Bio.SeqIO to parse files downloaded with Bio.GenBank, Bio.WWW.NCBI,
Bio.ExPASy etc.
Goals:
- Make sure that all retrieval is working as expected.
- May catch some format changes early too.
"""
import unittest
# We want to test these:
from Bio import Entrez
from Bio import ExPASy
# In order to check any sequences returned
from Bio import SeqIO
from Bio.SeqUtils.CheckSum import seguid
from Bio.SwissProt import SwissProtParserError
import requires_internet
requires_internet.check()
# This lets us set the email address to be sent to NCBI Entrez:
Entrez.email = "biopython@biopython.org"
class ExPASyTests(unittest.TestCase):
"""Tests for Bio.ExPASy module."""
def test_get_sprot_raw(self):
"""Bio.ExPASy.get_sprot_raw("O23729")."""
identifier = "O23729"
handle = ExPASy.get_sprot_raw(identifier)
try:
record = SeqIO.read(handle, "swiss")
except SwissProtParserError as e:
# This is to catch an error page from our proxy
if str(e) == "Failed to find ID in first line" and e.line.startswith(
"<!DOCTYPE HTML"
):
raise OSError from None
handle.close()
self.assertEqual(record.id, identifier)
self.assertEqual(len(record), 394)
self.assertEqual(seguid(record.seq), "5Y08l+HJRDIlhLKzFEfkcKd1dkM")
class EntrezTests(unittest.TestCase):
def simple(self, database, formats, entry, length, checksum):
for f in formats:
handle = Entrez.efetch(db=database, id=entry, rettype=f, retmode="text")
if f == "gbwithparts":
f = "gb"
record = SeqIO.read(handle, f)
handle.close()
# NCBI still takes GI on input, but phasing it out in output
gi_to_acc = {
"6273291": "AF191665.1",
"16130152": "NP_416719.1",
}
if entry in gi_to_acc:
entry = gi_to_acc[entry]
self.assertTrue(
(entry in record.name)
or (entry in record.id)
or ("gi" in record.annotations and record.annotations["gi"] == entry),
"%s got %s, %s" % (entry, record.name, record.id),
)
self.assertEqual(len(record), length)
self.assertEqual(seguid(record.seq), checksum)
for database, formats, entry, length, checksum in [
("nuccore", ["fasta", "gb"], "X52960", 248, "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU"),
("nucleotide", ["fasta", "gb"], "6273291", 902, "bLhlq4mEFJOoS9PieOx4nhGnjAQ"),
(
"protein",
["fasta", "gbwithparts"],
"16130152",
367,
"fCjcjMFeGIrilHAn6h+yju267lg",
),
]:
def funct(d, f, e, l, c):
method = lambda x: x.simple(d, f, e, l, c) # noqa: E731
method.__doc__ = "Bio.Entrez.efetch(%r, id=%r, ...)" % (d, e)
return method
setattr(
EntrezTests,
"test_%s_%s" % (database, entry),
funct(database, formats, entry, length, checksum),
)
del funct
del database, formats, entry, length, checksum
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)
|