1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
//
// Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/Invariant.h>
#include <map>
#include <boost/lexical_cast.hpp>
#include <boost/tokenizer.hpp>
#include <boost/format.hpp>
#include <algorithm>
typedef boost::tokenizer<boost::char_separator<char>> tokenizer;
namespace RDKit {
namespace MolEnumerator {
struct LinkNode {
unsigned int minRep = 0;
unsigned int maxRep = 0;
unsigned int nBonds = 0;
std::vector<std::pair<unsigned int, unsigned int>> bondAtoms;
};
namespace utils {
inline std::vector<LinkNode> getMolLinkNodes(
const ROMol &mol, bool strict = true,
const std::map<unsigned, Atom *> *atomIdxMap = nullptr) {
std::vector<LinkNode> res;
std::string pval;
if (!mol.getPropIfPresent(common_properties::molFileLinkNodes, pval)) {
return res;
}
std::vector<int> mapping;
boost::char_separator<char> pipesep("|");
boost::char_separator<char> spacesep(" ");
for (auto linknodetext : tokenizer(pval, pipesep)) {
LinkNode node;
tokenizer tokens(linknodetext, spacesep);
std::vector<unsigned int> data;
try {
std::transform(tokens.begin(), tokens.end(), std::back_inserter(data),
[](const std::string &token) -> unsigned int {
return boost::lexical_cast<unsigned int>(token);
});
} catch (boost::bad_lexical_cast &) {
std::ostringstream errout;
errout << "Cannot convert values in LINKNODE '" << linknodetext
<< "' to unsigned ints";
if (strict) {
throw ValueErrorException(errout.str());
} else {
BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
continue;
}
}
// the second test here is for the atom-pairs defining the bonds
// data[2] contains the number of bonds
if (data.size() < 5 || data.size() < 3 + 2 * data[2]) {
std::ostringstream errout;
errout << "not enough values in LINKNODE '" << linknodetext << "'";
if (strict) {
throw ValueErrorException(errout.str());
} else {
BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
continue;
}
}
node.minRep = data[0];
node.maxRep = data[1];
if (node.minRep == 0 || node.maxRep < node.minRep) {
std::ostringstream errout;
errout << "bad counts in LINKNODE '" << linknodetext << "'";
if (strict) {
throw ValueErrorException(errout.str());
} else {
BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
continue;
}
}
node.nBonds = data[2];
if (node.nBonds != 2) {
if (strict) {
UNDER_CONSTRUCTION(
"only link nodes with 2 bonds are currently supported");
} else {
BOOST_LOG(rdWarningLog)
<< "only link nodes with 2 bonds are currently supported"
<< std::endl;
continue;
}
}
// both bonds must start from the same atom:
if (data[3] != data[5]) {
std::ostringstream errout;
errout << "bonds don't start at the same atom for LINKNODE '"
<< linknodetext << "'";
if (strict) {
throw ValueErrorException(errout.str());
} else {
BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
continue;
}
}
if (atomIdxMap) {
// map the indices back to the original atom numbers
for (unsigned int i = 3; i <= 6; ++i) {
const auto aidx = atomIdxMap->find(data[i] - 1);
if (aidx == atomIdxMap->end()) {
std::ostringstream errout;
errout << "atom index " << data[i]
<< " cannot be found in molecule for LINKNODE '"
<< linknodetext << "'";
if (strict) {
throw ValueErrorException(errout.str());
} else {
BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
continue;
}
} else {
data[i] = aidx->second->getIdx();
}
}
} else {
for (unsigned int i = 3; i <= 6; ++i) {
--data[i];
}
}
node.bondAtoms.push_back(std::make_pair(data[3], data[4]));
node.bondAtoms.push_back(std::make_pair(data[5], data[6]));
if (!mol.getBondBetweenAtoms(data[4], data[3]) ||
!mol.getBondBetweenAtoms(data[6], data[5])) {
std::ostringstream errout;
errout << "bond not found between atoms in LINKNODE '" << linknodetext
<< "'";
if (strict) {
throw ValueErrorException(errout.str());
} else {
BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
continue;
}
}
res.push_back(std::move(node));
}
return res;
}
} // namespace utils
} // namespace MolEnumerator
} // namespace RDKit
|