1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
|
// Examples of using Boost Graph Library with gemmi::ChemComp:
// checking graph and subgraph isomorphism,
// finding maximum common induced subgraph.
//
// NOTE: documentation (mol.rst) includes parts of this file.
#include <cstring> // for strcmp
#include <iostream> // for std::cout, std::cerr
#include <boost/graph/graph_traits.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/vf2_sub_graph_iso.hpp>
#include <boost/graph/mcgregor_common_subgraphs.hpp>
#include <gemmi/read_cif.hpp> // for read_cif_gz
#include <gemmi/chemcomp.hpp> // for ChemComp, make_chemcomp_from_block
struct AtomVertex {
gemmi::Element el = gemmi::El::X;
std::string name;
};
struct BondEdge {
gemmi::BondType type;
};
using Graph = boost::adjacency_list<boost::vecS, boost::vecS,
boost::undirectedS,
AtomVertex, BondEdge>;
Graph make_graph(const gemmi::ChemComp& cc) {
Graph g(cc.atoms.size());
for (size_t i = 0; i != cc.atoms.size(); ++i) {
g[i].el = cc.atoms[i].el;
g[i].name = cc.atoms[i].id;
}
for (const gemmi::Restraints::Bond& bond : cc.rt.bonds) {
int n1 = cc.get_atom_index(bond.id1.atom);
int n2 = cc.get_atom_index(bond.id2.atom);
boost::add_edge(n1, n2, BondEdge{bond.type}, g);
}
return g;
}
gemmi::ChemComp make_chemcomp(const char* path) {
gemmi::cif::Document doc = gemmi::read_cif_gz(path);
// assuming the component description is in the last block of the file
return gemmi::make_chemcomp_from_block(doc.blocks.back());
}
using GraphTraits = boost::graph_traits<Graph>;
using Vertex = GraphTraits::vertex_descriptor;
using Edge = GraphTraits::edge_descriptor;
// Example 1 - count automorphisms (a toy example).
struct CountingCallback {
int n = 0;
template <typename CorrespondenceMap1To2, typename CorrespondenceMap2To1>
bool operator()(CorrespondenceMap1To2, CorrespondenceMap2To1) {
++n;
return true;
}
};
void count_automorphisms_of_SO3() {
Graph g = make_graph(make_chemcomp("tests/SO3.cif"));
CountingCallback counting_callback;
boost::vf2_graph_iso(
g, g, std::ref(counting_callback),
// default values
boost::get(boost::vertex_index, g),
boost::get(boost::vertex_index, g),
boost::vertex_order_by_mult(g),
// ignoring bond types - all edges are considered equivalent
boost::always_equivalent(),
// atoms of the same type (element) are considered equivalent
[&](Vertex a, Vertex b) { return g[a].el == g[b].el; });
std::cout << counting_callback.n << std::endl;
// prints 6 (3! permutations of the three oxygens in SO3)
}
// Example 2 - check graph isomorphism.
//
// Example output:
//
// $ ./with_bgl ccd/M10.cif monomers/m/M10.cif
// isomorphic!
// O4 -> O9
// O9 -> O4
//
struct PrintMappingCallback {
Graph g1, g2;
template <typename CorrespondenceMap1To2, typename CorrespondenceMap2To1>
bool operator()(CorrespondenceMap1To2 map1, CorrespondenceMap2To1) {
std::cout << "isomorphic!\n";
for (auto vp = boost::vertices(g1); vp.first != vp.second; ++vp.first) {
Vertex v1 = *vp.first;
Vertex v2 = boost::get(map1, v1);
if (v2 != GraphTraits::null_vertex()) {
const std::string& name1 = g1[v1].name;
const std::string& name2 = g2[v2].name;
if (name1 != name2)
std::cout << " " << name1 << " -> " << name2 << std::endl;
}
}
return false;
}
};
void check_graph_isomorphism(const char* cif1, const char* cif2) {
Graph graph1 = make_graph(make_chemcomp(cif1));
Graph graph2 = make_graph(make_chemcomp(cif2));
bool found = vf2_graph_iso(
graph1, graph2,
PrintMappingCallback{graph1, graph2},
get(boost::vertex_index, graph1), get(boost::vertex_index, graph2),
boost::vertex_order_by_mult(graph1),
[&](Edge a, Edge b) { return graph1[a].type == graph2[b].type; },
[&](Vertex a, Vertex b) { return graph1[a].el == graph2[b].el; });
if (!found)
std::cout << "not isomorphic" << std::endl;
}
// Example 3 - check substructure isomorphism (if any),
// ignoring hydrogens and bond types.
void check_subgraph_isomorphism(const char* cif1, const char* cif2) {
gemmi::ChemComp cc1 = make_chemcomp(cif1).remove_hydrogens();
Graph graph1 = make_graph(cc1);
gemmi::ChemComp cc2 = make_chemcomp(cif2).remove_hydrogens();
Graph graph2 = make_graph(cc2);
bool found = vf2_subgraph_iso(
graph1, graph2,
PrintMappingCallback{graph1, graph2},
get(boost::vertex_index, graph1), get(boost::vertex_index, graph2),
boost::vertex_order_by_mult(graph1),
boost::always_equivalent(), // edge_comp
[&](Vertex a, Vertex b) { return graph1[a].el == graph2[b].el; });
if (!found)
std::cout << "not isomorphic" << std::endl;
}
// Example 4 - find the largest common subgraph using McGregor's algorithm.
// We ignore hydrogens here.
// Example output:
// $ time with_bgl -c monomers/a/AUD.cif monomers/l/LSA.cif
// Searching largest subgraphs of AUD and LSA (10 and 12 vertices)...
// Maximum connected common subgraph has 7 vertices:
// SAA -> S10
// OAG -> O12
// OAH -> O11
// NAF -> N9
// CAE -> C7
// OAI -> O8
// CAD -> C1
// Maximum connected common subgraph has 7 vertices:
// SAA -> S10
// OAG -> O11
// OAH -> O12
// NAF -> N9
// CAE -> C7
// OAI -> O8
// CAD -> C1
//
// real 0m0.012s
// user 0m0.008s
// sys 0m0.004s
struct PrintCommonSubgraphCallback {
Graph g1, g2;
template <typename CorrespondenceMap1To2, typename CorrespondenceMap2To1>
bool operator()(CorrespondenceMap1To2 map1, CorrespondenceMap2To1,
typename GraphTraits::vertices_size_type subgraph_size) {
std::cout << "Maximum connected common subgraph has " << subgraph_size
<< " vertices:\n";
for (auto vp = boost::vertices(g1); vp.first != vp.second; ++vp.first) {
Vertex v1 = *vp.first;
Vertex v2 = boost::get(map1, v1);
if (v2 != GraphTraits::null_vertex())
std::cout << " " << g1[v1].name << " -> " << g2[v2].name << std::endl;
}
return true;
}
};
void find_common_subgraph(const char* cif1, const char* cif2) {
gemmi::ChemComp cc1 = make_chemcomp(cif1).remove_hydrogens();
Graph graph1 = make_graph(cc1);
gemmi::ChemComp cc2 = make_chemcomp(cif2).remove_hydrogens();
Graph graph2 = make_graph(cc2);
std::cout << "Searching largest subgraphs of " << cc1.name << " and "
<< cc2.name << " (" << cc1.atoms.size() << " and " << cc2.atoms.size()
<< " vertices)..." << std::endl;
mcgregor_common_subgraphs_maximum_unique(
graph1, graph2,
get(boost::vertex_index, graph1), get(boost::vertex_index, graph2),
[&](Edge a, Edge b) { return graph1[a].type == graph2[b].type; },
[&](Vertex a, Vertex b) { return graph1[a].el == graph2[b].el; },
/*only_connected_subgraphs=*/ true,
PrintCommonSubgraphCallback{graph1, graph2});
}
// a minimal program that can call the functions above
int main(int argc, char* argv[]) {
if (argc == 1)
count_automorphisms_of_SO3(); // toy example that outputs "6".
else if (argc == 3)
check_graph_isomorphism(argv[1], argv[2]);
else if (argc == 4 && std::strcmp(argv[1], "-s") == 0)
check_subgraph_isomorphism(argv[2], argv[3]);
else if (argc == 4 && std::strcmp(argv[1], "-c") == 0)
find_common_subgraph(argv[2], argv[3]);
else
std::cerr << "Usage: " << argv[0] << " [-s|-c] ligand1.cif ligand2.cif\n";
return 0;
}
|