1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
|
/*
* A very, very basic SMILES-like parser. No aromaticity,
* or zero-order bonds. No chirality/stereochemistry
*
* Do not use this as a basis for real SMILES parsers. It just lets
* us sidestep using a full chemistry toolkit when writing tests.
*/
#include <algorithm>
#include <iostream>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>
#include "../sketcherMinimizerMolecule.h"
namespace schrodinger
{
sketcherMinimizerMolecule* approxSmilesParse(const std::string& smiles)
{
const std::unordered_map<char, int> elements{
{'H', 1}, {'C', 6}, {'N', 7}, {'S', 16}, {'O', 8}};
auto mol = new sketcherMinimizerMolecule();
std::stack<std::stack<sketcherMinimizerAtom*>> tree;
tree.emplace();
auto* prev = &tree.top();
std::unordered_map<char, sketcherMinimizerAtom*> cycles;
int bond_order = 1;
size_t idx = 0;
for (auto c : smiles) {
auto atomic_number = elements.find(c);
if (atomic_number != elements.end()) {
auto atom = mol->addNewAtom();
atom->setAtomicNumber(atomic_number->second);
if (!prev->empty()) {
auto bond = mol->addNewBond(atom, prev->top());
bond->setBondOrder(bond_order);
bond_order = 1;
}
prev->push(atom);
} else if (c == '=') {
bond_order = 2;
} else if (c == '1' || c == '2' || c == '3' || c == '4') {
auto other = cycles.find(c);
if (other == cycles.end()) {
cycles[c] = prev->top();
} else {
auto bond = mol->addNewBond(prev->top(), other->second);
bond->setBondOrder(bond_order);
bond_order = 1;
cycles.erase(other);
}
} else if (c == '(') {
auto old = prev->top();
tree.emplace();
prev = &tree.top();
prev->push(old);
} else if (c == ')') {
tree.pop();
prev = &tree.top();
} else {
std::string msg = "unrecognized symbol: ";
msg += c;
msg += " in SMILES: " + smiles;
throw std::runtime_error(msg);
}
++idx;
}
sketcherMinimizerMolecule::assignBondsAndNeighbors(mol->getAtoms(),
mol->getBonds());
return mol;
}
} // namespace schrodinger
|