1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
// This is a manually run test. It searches for disulfide (SG-SG only)
// bonds in two ways:
// - using a simple search - find_disulfide_bonds1()
// - using cell-linked lists method from neighbor.hpp - find_disulfide_bonds2()
// and compares the results.
// It was run for all PDB entries to test the cell lists implementation.
#include <gemmi/neighbor.hpp>
#include <gemmi/contact.hpp>
#include <gemmi/model.hpp>
#include <gemmi/mmread_gz.hpp>
#include <gemmi/dirwalk.hpp>
#include <stdexcept> // for runtime_error
#include <chrono>
static int verbose = false;
using namespace gemmi;
struct BondInfo {
const_CRA cra1, cra2;
int image_idx;
double dist_sq;
void print(const UnitCell& cell) const {
NearestImage im = cell.find_nearest_pbc_image(cra1.atom->pos, cra2.atom->pos, image_idx);
assert(fabs(dist_sq - im.dist_sq) < 1e-3);
std::printf("%s - %s im:%s %.3f\n",
atom_str(cra1).c_str(), atom_str(cra2).c_str(),
im.symmetry_code(true).c_str(), std::sqrt(dist_sq));
}
};
// Number of SG atoms is relatively small; checking all pairs should be fast.
std::vector<BondInfo> find_disulfide_bonds1(const Model& model,
const UnitCell& cell) {
const double max_dist = 3.0;
// Find all SG sulfur atoms.
std::vector<const_CRA> atoms;
const std::string sg = "SG";
for (const Chain& chain : model.chains)
for (const Residue& res : chain.residues)
for (const Atom& atom : res.atoms)
if (atom.element == El::S && atom.name == sg)
atoms.push_back(const_CRA{&chain, &res, &atom});
// Check distances.
std::vector<BondInfo> ret;
for (size_t i = 0; i < atoms.size(); ++i) {
const Atom* a1 = atoms[i].atom;
for (size_t j = i; j < atoms.size(); ++j) {
const Atom* a2 = atoms[j].atom;
if (a1->same_conformer(*a2)) {
Asu asu = (i != j ? Asu::Any : Asu::Different);
NearestImage im = cell.find_nearest_image(a1->pos, a2->pos, asu);
// if i == j and the image is nearby the atom is on special position
if (im.dist_sq < max_dist * max_dist && (i != j || im.dist_sq > 1.0))
ret.push_back({atoms[i], atoms[j], im.sym_idx, im.dist_sq});
}
}
}
return ret;
}
// use NeighborSearch (cell list method) to find disulfide SG-SG bonds
static std::vector<BondInfo> find_disulfide_bonds2(Model& model,
const UnitCell& cell) {
const double max_dist = 3.0;
const std::string sg = "SG";
NeighborSearch ns(model, cell, 5.0);
#if 0
ns.populate();
#else
for (const Chain& chain : model.chains)
for (const Residue& res : chain.residues)
for (const Atom& atom : res.atoms)
if (atom.element == El::S && atom.name == sg) {
auto indices = model.get_indices(&chain, &res, &atom);
ns.add_atom(atom, indices[0], indices[1], indices[2]);
}
#endif
std::vector<BondInfo> ret;
#if 0 // faster, but requires more code
for (const Chain& chain : model.chains)
for (const Residue& res : chain.residues)
for (const Atom& atom : res.atoms)
if (atom.element == El::S && atom.name == sg) {
auto indices = model.get_indices(&chain, &res, &atom);
ns.for_each(atom.pos, atom.altloc, max_dist,
[&](const NeighborSearch::Mark& m, double dist_sq) {
if (m.element != El::S)
return;
if (indices[0] > m.chain_idx || (indices[0] == m.chain_idx &&
(indices[1] > m.residue_idx ||
(indices[1] == m.residue_idx && dist_sq < 1))))
return;
const_CRA cra1 = {&chain, &res, &atom};
const_CRA cra2 = m.to_cra(model);
if (cra2.atom->name == sg)
ret.push_back({cra1, cra2, m.image_idx, dist_sq});
});
}
#else // slower, but simpler
ContactSearch contacts(max_dist);
contacts.for_each_contact(ns, [&](const CRA& cra1, const CRA& cra2,
int image_idx, double dist_sq) {
if (cra1.atom->element == El::S && cra1.atom->name == sg &&
cra2.atom->element == El::S && cra2.atom->name == sg)
ret.push_back({cra1, cra2, image_idx, dist_sq});
});
#endif
return ret;
}
static void check_disulf(const std::string& path) {
if (verbose)
printf("path: %s\n", path.c_str());
Structure st = read_structure_gz(path);
Model& model = st.first_model();
using Clock = std::chrono::steady_clock;
auto start = Clock::now();
std::vector<BondInfo> c1 = find_disulfide_bonds1(model, st.cell);
std::chrono::duration<double> elapsed1 = Clock::now() - start;
start = Clock::now();
std::vector<BondInfo> c2 = find_disulfide_bonds2(model, st.cell);
std::chrono::duration<double> elapsed2 = Clock::now() - start;
printf("%10s %zu %zu (%.3g %.3g s)\n", st.name.c_str(),
c1.size(), c2.size(), elapsed1.count(), elapsed2.count());
if (c1.size() != c2.size() || verbose) {
for (const BondInfo& bi : c1)
bi.print(st.cell);
printf("---\n");
for (const BondInfo& bi : c2)
bi.print(st.cell);
}
}
int main(int argc, char* argv[]) {
int pos = 1;
while (pos < argc && argv[pos] == std::string("-v")) {
++pos;
verbose = true;
}
if (pos >= argc) {
std::fprintf(stderr, "No paths given.\n");
return 1;
}
try {
for (; pos != argc; ++pos)
for (std::string path : CoorFileWalk(argv[pos], 'M'))
check_disulf(path);
} catch (std::runtime_error& err) {
std::fprintf(stderr, "Error: %s\n", err.what());
return 1;
}
return 0;
}
|