File: common_BioSQL_online.py

package info (click to toggle)
python-biopython 1.78%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 65,756 kB
  • sloc: python: 221,141; xml: 178,777; ansic: 13,369; sql: 1,208; makefile: 131; sh: 70
file content (133 lines) | stat: -rw-r--r-- 4,544 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Test storing biopython objects in a BioSQL relational db."""

import os
import platform
import unittest
import tempfile
import time

from io import StringIO

# Hide annoying warnings from things like bonds in GenBank features,
# or PostgreSQL schema rules. TODO - test these warnings are raised!
import warnings
from Bio import BiopythonWarning

# local stuff
from Bio import MissingExternalDependencyError
from Bio.Seq import Seq, MutableSeq
from Bio.SeqFeature import SeqFeature
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

from BioSQL import BioSeqDatabase
from BioSQL import BioSeq
from Bio import Entrez

from common_BioSQL import create_database, destroy_database, check_config

from seq_tests_common import compare_record, compare_records

import requires_internet

if __name__ == "__main__":
    raise RuntimeError("Call this via test_BioSQL_*online.py not directly")

# Sharing these with test_BioSQL_XXX_online.py files which import this file:
# DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA, SQL_FILE, SYSTEM
SYSTEM = platform.system()


def share_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb):
    """Make sure we can access the DB settings from this file."""
    global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA
    global SYSTEM, SQL_FILE
    DBDRIVER = dbdriver
    DBTYPE = dbtype
    DBHOST = dbhost
    DBUSER = dbuser
    DBPASSWD = dbpasswd
    TESTDB = testdb


class TaxonomyTest(unittest.TestCase):
    """Test proper insertion and retrieval of taxonomy data."""

    def setUp(self):
        global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA
        global SYSTEM, SQL_FILE

        Entrez.email = "biopython@biopython.org"
        # create TESTDB
        TESTDB = create_database()

        # load the database
        db_name = "biosql-test"
        self.server = BioSeqDatabase.open_database(
            driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
        )

        # remove the database if it already exists
        try:
            self.server[db_name]
            self.server.remove_database(db_name)
        except KeyError:
            pass

        self.db = self.server.new_database(db_name)

        # get the GenBank file we are going to put into it
        self.iterator = SeqIO.parse("GenBank/cor6_6.gb", "gb")

    def tearDown(self):
        self.server.close()
        destroy_database()
        del self.db
        del self.server

    def test_taxon_left_right_values(self):
        self.db.load(self.iterator, True)
        sql = """SELECT DISTINCT include.ncbi_taxon_id FROM taxon
                  INNER JOIN taxon AS include ON
                      (include.left_value BETWEEN taxon.left_value
                                  AND taxon.right_value)
                  WHERE taxon.taxon_id IN
                      (SELECT taxon_id FROM taxon_name
                                  WHERE name = 'Brassicales')
                      AND include.right_value - include.left_value = 1"""

        rows = self.db.adaptor.execute_and_fetchall(sql)
        self.assertEqual(4, len(rows))
        values = set()
        for row in rows:
            values.add(row[0])
        self.assertEqual({3704, 3711, 3708, 3702}, set(values))

    def test_load_database_with_tax_lookup(self):
        """Load SeqRecord objects and fetch the taxonomy information from NCBI."""
        handle = Entrez.efetch(db="taxonomy", id=3702, retmode="XML")

        taxon_record = Entrez.read(handle)
        entrez_tax = []

        for t in taxon_record[0]["LineageEx"]:
            entrez_tax.append(t["ScientificName"])
        entrez_tax.append(taxon_record[0]["ScientificName"])
        self.db.load(self.iterator, True)

        # do some simple tests to make sure we actually loaded the right
        # thing. More advanced tests in a different module.
        items = list(self.db.values())
        self.assertEqual(len(items), 6)
        self.assertEqual(len(self.db), 6)

        test_record = self.db.lookup(accession="X55053")

        # make sure that the ncbi taxonomy id is corrent
        self.assertEqual(test_record.annotations["ncbi_taxid"], 3702)
        # make sure that the taxonomic lineage is the same as reported
        # using the Entrez module
        self.assertEqual(test_record.annotations["taxonomy"], entrez_tax)