File: test_mol2.py

package info (click to toggle)
mdtraj 1.11.1.post1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 79,392 kB
  • sloc: python: 25,484; ansic: 6,266; cpp: 5,685; xml: 1,252; makefile: 208; sh: 23
file content (184 lines) | stat: -rw-r--r-- 6,064 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
##############################################################################
# MDTraj: A Python Library for Loading, Saving, and Manipulating
#         Molecular Dynamics Trajectories.
# Copyright 2012-2013 Stanford University and the Authors
#
# Authors: Kyle A. Beauchamp
# Contributors:
#
# MDTraj is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with MDTraj. If not, see <http://www.gnu.org/licenses/>.
##############################################################################

import os
import pickle
import shutil
import tarfile

import numpy as np
import pytest
import scipy.sparse

import mdtraj as md
from mdtraj.formats import mol2
from mdtraj.testing import eq


def test_load_mol2(get_fn):
    trj = md.load(get_fn("imatinib.mol2"))
    ref_trj = md.load(get_fn("imatinib.pdb"), bond_orders=True)
    eq(trj.xyz, ref_trj.xyz)

    ref_top, ref_bonds = ref_trj.top.to_dataframe()
    top, bonds = trj.top.to_dataframe()

    # Strip bond orders from mol2 because reader does not read bond orders
    ref_bonds[:, -1:] = np.zeros([ref_bonds.shape[0], 1])
    # Floor bond type info since PDB does not have non-integer bond types
    bonds[:, -2] = np.floor(bonds[:, -2])
    eq(bonds, ref_bonds)


def test_write_from_mol2(get_fn, tmpdir):
    trj = md.load(get_fn("imatinib.mol2"))
    ref_trj = md.load(get_fn("imatinib.pdb"))

    # Write out the mol2 as pdb
    tmp_file = f"{tmpdir}/imatinib.pdb"
    trj.save(tmp_file)
    temp_trj = md.load(tmp_file)

    # Check if xyz are the same as ref
    eq(ref_trj.xyz, temp_trj.xyz)


@pytest.mark.skipif(shutil.which("obabel") is None, reason="Requires obabel")
@pytest.mark.skipif(os.environ.get("TRAVIS", None) == "true", reason="Skip on Travis.")
def test_load_freesolv_gaffmol2_vs_sybylmol2_vs_obabelpdb(get_fn, tmpdir):
    tar_filename = "freesolve_v0.3.tar.bz2"
    tar = tarfile.open(get_fn(tar_filename), mode="r:bz2")
    os.chdir(str(tmpdir))
    tar.extractall()
    tar.close()

    with open("./v0.3/database.pickle", "rb") as f:
        database = pickle.load(f, encoding="latin-1")

    for key in database:
        gaff_filename = "./v0.3/mol2files_gaff/%s.mol2" % key
        pdb_filename = "./v0.3/mol2files_gaff/%s.pdb" % key
        sybyl_filename = "./v0.3/mol2files_sybyl/%s.mol2" % key

        cmd = f"obabel -imol2 {sybyl_filename} -opdb > {pdb_filename} 2>/dev/null"
        assert os.system(cmd) == 0

        t_pdb = md.load(pdb_filename)
        t_gaff = md.load(gaff_filename)
        t_sybyl = md.load(sybyl_filename)

        eq(t_pdb.n_atoms, t_gaff.n_atoms)
        eq(t_pdb.n_atoms, t_sybyl.n_atoms)

        eq(t_pdb.n_frames, t_gaff.n_frames)
        eq(t_pdb.n_frames, t_gaff.n_frames)

        eq(t_pdb.xyz, t_gaff.xyz, decimal=4)
        eq(t_pdb.xyz, t_sybyl.xyz, decimal=4)

        top_pdb, bonds_pdb = t_pdb.top.to_dataframe()
        top_gaff, bonds_gaff = t_gaff.top.to_dataframe()
        top_sybyl, bonds_sybyl = t_sybyl.top.to_dataframe()

        eq(top_sybyl.name.values, top_pdb.name.values)

        # eq(top_gaff.name.values, top_sybyl.name.values)  # THEY CAN HAVE DIFFERENT NAMES, so this isn't TRUE!

        def make_bonds_comparable(bond_array):
            """
            Create a bond connectivity matrix from a numpy array of atom pairs.  Avoids having to
            compare the order in which bonds are listed.
            """
            n_bonds = len(bond_array)
            data = np.ones(n_bonds)
            i = bond_array[:, 0]
            j = bond_array[:, 1]
            matrix = scipy.sparse.coo_matrix(
                (data, (i, j)),
                shape=(t_pdb.n_atoms, t_pdb.n_atoms),
            ).toarray()
            return matrix + matrix.T  # Symmetrize to account for (a ~ b) versus (b ~ a)

        bond_matrix_pdb = make_bonds_comparable(bonds_pdb)
        bond_matrix_gaff = make_bonds_comparable(bonds_gaff)
        bond_matrix_sybyl = make_bonds_comparable(bonds_sybyl)

        eq(bond_matrix_pdb, bond_matrix_gaff)
        eq(bond_matrix_pdb, bond_matrix_sybyl)

        # Third row from mol2 file copied below, used in testing.
        #       3 N1          8.5150   -0.1620    1.3310 n3        1 LIG     -0.732600


def test_mol2_dataframe(get_fn):
    top, bonds = mol2.mol2_to_dataframes(get_fn("imatinib.mol2"))
    eq(top.name[2], "N1")
    eq(top.atype[2], "n3")
    eq(top.resName[2], "LIG")
    eq(float(top.charge[2]), -0.732600)


def test_mol2_dataframe_status(get_fn):
    atoms, bonds = mol2.mol2_to_dataframes(get_fn("adp.mol2"))
    assert atoms["charge"][1] == 1.3672
    assert atoms["status"][1] == "****"


def test_mol2_warnings(get_fn):
    md.load_mol2(get_fn("lysozyme-ligand-tripos.mol2"))


def test_mol2_status_bits(get_fn):
    trj = md.load_mol2(get_fn("status-bits.mol2"))
    eq(trj.topology.n_atoms, 18)
    eq(trj.topology.n_bonds, 18)


def test_mol2_without_bonds(get_fn):
    trj = md.load_mol2(get_fn("li.mol2"))
    assert trj.topology.n_bonds == 0


def test_mol2_element_name(get_fn):
    trj = md.load_mol2(get_fn("cl.mol2"))
    top, bonds = trj.top.to_dataframe()
    assert top.iloc[0]["element"] == "Cl"


@pytest.mark.parametrize(
    "mol2_file",
    [
        ("li.mol2"),
        ("lysozyme-ligand-tripos.mol2"),
        ("imatinib.mol2"),
        ("status-bits.mol2"),
        ("adp.mol2"),
        ("water_acn.mol2"),
    ],
)
def test_load_all_mol2(mol2_file, get_fn):
    md.load_mol2(get_fn(mol2_file))


def test_mol2_n_residues(get_fn):
    trj = md.load_mol2(get_fn("water_acn.mol2"))
    assert trj.n_residues == 10