File: common_BioSQL_online.py

package info (click to toggle)
python-biopython 1.85%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 126,372 kB
  • sloc: xml: 1,047,995; python: 332,722; ansic: 16,944; sql: 1,208; makefile: 140; sh: 81
file content (130 lines) | stat: -rw-r--r-- 4,512 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Test storing biopython objects in a BioSQL relational db."""

import os
import platform
import tempfile
import time
import unittest

# Hide annoying warnings from things like bonds in GenBank features,
# or PostgreSQL schema rules. TODO - test these warnings are raised!
import warnings
from io import StringIO

import requires_internet
from common_BioSQL import check_config
from common_BioSQL import create_database
from common_BioSQL import destroy_database

from Bio import BiopythonWarning
from Bio import Entrez

# local stuff
from Bio import MissingExternalDependencyError
from Bio import SeqIO
from Bio.Seq import MutableSeq
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature
from Bio.SeqRecord import SeqRecord
from BioSQL import BioSeq
from BioSQL import BioSeqDatabase

if __name__ == "__main__":
    raise RuntimeError("Call this via test_BioSQL_*online.py not directly")

# Sharing these with test_BioSQL_XXX_online.py files which import this file:
# DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA, SQL_FILE, SYSTEM
SYSTEM = platform.system()


def share_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb):
    """Make sure we can access the DB settings from this file."""
    global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA
    global SYSTEM, SQL_FILE
    DBDRIVER = dbdriver
    DBTYPE = dbtype
    DBHOST = dbhost
    DBUSER = dbuser
    DBPASSWD = dbpasswd
    TESTDB = testdb


class TaxonomyTest(unittest.TestCase):
    """Test proper insertion and retrieval of taxonomy data."""

    def setUp(self):
        global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA
        global SYSTEM, SQL_FILE

        Entrez.email = "biopython@biopython.org"
        # create TESTDB
        TESTDB = create_database()

        # load the database
        db_name = "biosql-test"
        self.server = BioSeqDatabase.open_database(
            driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
        )

        # remove the database if it already exists
        try:
            self.server[db_name]
            self.server.remove_database(db_name)
        except KeyError:
            pass

        self.db = self.server.new_database(db_name)

        # get the GenBank file we are going to put into it
        self.iterator = SeqIO.parse("GenBank/cor6_6.gb", "gb")

    def tearDown(self):
        self.server.close()
        destroy_database()
        del self.db
        del self.server

    def test_taxon_left_right_values(self):
        self.db.load(self.iterator, True)
        sql = """SELECT DISTINCT include.ncbi_taxon_id FROM taxon
                  INNER JOIN taxon AS include ON
                      (include.left_value BETWEEN taxon.left_value
                                  AND taxon.right_value)
                  WHERE taxon.taxon_id IN
                      (SELECT taxon_id FROM taxon_name
                                  WHERE name = 'Brassicales')
                      AND include.right_value - include.left_value = 1"""

        rows = self.db.adaptor.execute_and_fetchall(sql)
        self.assertEqual(4, len(rows))
        values = [row[0] for row in rows]
        self.assertCountEqual([3704, 3711, 3708, 3702], values)

    def test_load_database_with_tax_lookup(self):
        """Load SeqRecord objects and fetch the taxonomy information from NCBI."""
        handle = Entrez.efetch(db="taxonomy", id=3702, retmode="XML")

        taxon_record = Entrez.read(handle)
        entrez_tax = []

        for t in taxon_record[0]["LineageEx"]:
            entrez_tax.append(t["ScientificName"])
        entrez_tax.append(taxon_record[0]["ScientificName"])
        self.db.load(self.iterator, True)

        # do some simple tests to make sure we actually loaded the right
        # thing. More advanced tests in a different module.
        items = list(self.db.values())
        self.assertEqual(len(items), 6)
        self.assertEqual(len(self.db), 6)

        test_record = self.db.lookup(accession="X55053")

        # make sure that the ncbi taxonomy id is correct
        self.assertEqual(test_record.annotations["ncbi_taxid"], 3702)
        # make sure that the taxonomic lineage is the same as reported
        # using the Entrez module
        self.assertEqual(test_record.annotations["taxonomy"], entrez_tax)