1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
//
// Copyright (C) 2018 Susan H. Leung
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "Fragment.h"
#include <GraphMol/MolStandardize/FragmentCatalog/FragmentCatalogUtils.h>
#include <boost/tokenizer.hpp>
typedef boost::tokenizer<boost::char_separator<char>> tokenizer;
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/Descriptors/MolDescriptors.h>
#include <RDGeneral/types.h>
namespace RDKit {
namespace MolStandardize {
//constructor
FragmentRemover::FragmentRemover(){
BOOST_LOG(rdInfoLog) << "Initializing FragmentRemover\n" ;
FragmentCatalogParams fparams(defaultCleanupParameters.fragmentFile);
// unsigned int numfg = fparams->getNumFuncGroups();
// TEST_ASSERT(fparams->getNumFuncGroups() == 61);
this->d_fcat = new FragmentCatalog(&fparams);
this->LEAVE_LAST = true;
}
//overloaded constructor
FragmentRemover::FragmentRemover(const std::string fragmentFile, const bool leave_last){
FragmentCatalogParams fparams(fragmentFile);
this->d_fcat = new FragmentCatalog(&fparams);
this->LEAVE_LAST = leave_last;
}
//Destructor
FragmentRemover::~FragmentRemover(){
delete d_fcat;
};
ROMol *FragmentRemover::remove(const ROMol &mol) {
BOOST_LOG(rdInfoLog) << "Running FragmentRemover\n" ;
PRECONDITION(this->d_fcat, "");
const FragmentCatalogParams *fparams = this->d_fcat->getCatalogParams();
PRECONDITION(fparams, "");
const std::vector<std::shared_ptr<ROMol>> &fgrps = fparams->getFuncGroups();
auto *removed = new ROMol(mol);
for (auto &fgci : fgrps) {
std::vector<boost::shared_ptr<ROMol>> frags = MolOps::getMolFrags(*removed);
// If nothing is left or leave_last and only one fragment, end here
if (removed->getNumAtoms() == 0 ||
(this->LEAVE_LAST && frags.size() <= 1)) {
break;
}
std::string fname;
fgci->getProp(common_properties::_Name, fname);
ROMol *tmp = RDKit::deleteSubstructs(*removed, *fgci, true);
if (tmp->getNumAtoms() != removed->getNumAtoms()) {
BOOST_LOG(rdInfoLog) << "Removed fragment: " << fname << "\n";
}
if (this->LEAVE_LAST && tmp->getNumAtoms() == 0) {
// All the remaining fragments match this pattern - leave them all
delete tmp;
break;
}
delete removed;
removed = tmp;
}
return removed;
}
bool isOrganic(const ROMol &frag) {
// Returns true if fragment contains at least one carbon atom.
for (const auto at : frag.atoms()) {
if (at->getAtomicNum() == 6) {
return true;
}
}
return false;
}
LargestFragmentChooser::LargestFragmentChooser(
const LargestFragmentChooser &other) {
BOOST_LOG(rdInfoLog) << "Initializing LargestFragmentChooser\n";
PREFER_ORGANIC = other.PREFER_ORGANIC;
}
ROMol *LargestFragmentChooser::choose(const ROMol &mol) {
BOOST_LOG(rdInfoLog) << "Running LargestFragmentChooser\n";
std::vector<boost::shared_ptr<ROMol>> frags = MolOps::getMolFrags(mol);
LargestFragmentChooser::Largest l;
for (const auto &frag : frags) {
std::string smiles = MolToSmiles(*frag);
BOOST_LOG(rdInfoLog) << "Fragment: " << smiles << "\n";
bool organic = isOrganic(*frag);
if (this->PREFER_ORGANIC) {
// Skip this fragment if not organic and we already have an organic
// fragment as the largest so far
if (l.Fragment != nullptr && l.Organic && !organic) continue;
// Reset largest if it wasn't organic and this fragment is organic
// if largest and organic and not largest['organic']:
if (l.Fragment != nullptr && organic && !l.Organic) {
l.Fragment = nullptr;
}
}
unsigned int numatoms = 0;
for (const auto at : frag->atoms()) {
numatoms += 1 + at->getTotalNumHs();
}
// Skip this fragment if fewer atoms than the largest
if (l.Fragment != nullptr && (numatoms < l.NumAtoms)) continue;
// Skip this fragment if equal number of atoms but weight is lower
double weight = Descriptors::calcExactMW(*frag);
if (l.Fragment != nullptr && (numatoms == l.NumAtoms) &&
(weight < l.Weight))
continue;
// Skip this fragment if equal number of atoms and equal weight but smiles
// comes last alphabetically
if (l.Fragment != nullptr && (numatoms == l.NumAtoms) &&
(weight == l.Weight) && (smiles > l.Smiles))
continue;
BOOST_LOG(rdInfoLog) << "New largest fragment: " << smiles << " (" <<
numatoms << ")\n";
// Otherwise this is the largest so far
l.Smiles = smiles;
l.Fragment = frag;
l.NumAtoms = numatoms;
l.Weight = weight;
l.Organic = organic;
}
return new ROMol(*(l.Fragment));
}
LargestFragmentChooser::Largest::Largest()
: Smiles(""), Fragment(nullptr), NumAtoms(0), Weight(0), Organic(false) {}
LargestFragmentChooser::Largest::Largest(
std::string &smiles, const boost::shared_ptr<ROMol> &fragment,
unsigned int &numatoms, double &weight, bool &organic)
: Smiles(smiles),
Fragment(fragment),
NumAtoms(numatoms),
Weight(weight),
Organic(organic) {}
} // namespace MolStandardize
} // namespace RDKit
|