1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
// $Id$
//
// Copyright (C) 2003-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// REVIEW: move this to a GraphMol/FuncGroups directory
#include <RDGeneral/BadFileException.h>
#include "FragCatalogUtils.h"
#include <GraphMol/Subgraphs/Subgraphs.h>
#include <GraphMol/Subgraphs/SubgraphUtils.h>
#include <fstream>
#include <string>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <boost/tokenizer.hpp>
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
#include <boost/algorithm/string.hpp>
namespace RDKit {
// local utility namespace
namespace {
ROMol *getSmarts(const std::string &tmpStr) {
ROMol *mol = nullptr;
if (tmpStr.length() == 0) {
// empty line
return mol;
}
if (tmpStr.substr(0, 2) == "//") {
// comment line
return mol;
}
boost::char_separator<char> tabSep("\t");
tokenizer tokens(tmpStr, tabSep);
tokenizer::iterator token = tokens.begin();
// name of the functional groups
std::string name = *token;
boost::erase_all(name, " ");
++token;
// grab the smarts:
std::string smarts = *token;
boost::erase_all(smarts, " ");
++token;
mol = SmartsToMol(smarts);
CHECK_INVARIANT(mol, smarts);
mol->setProp(common_properties::_Name, name);
mol->setProp(common_properties::_fragSMARTS, smarts);
return mol;
}
} // end of local utility namespace
MOL_SPTR_VECT readFuncGroups(std::istream &inStream, int nToRead) {
MOL_SPTR_VECT funcGroups;
funcGroups.clear();
if (inStream.bad()) {
throw BadFileException("Bad stream contents.");
}
const int MAX_LINE_LEN = 512;
char inLine[MAX_LINE_LEN];
std::string tmpstr;
int nRead = 0;
while (!inStream.eof() && (nToRead < 0 || nRead < nToRead)) {
inStream.getline(inLine, MAX_LINE_LEN, '\n');
tmpstr = inLine;
// parse the molecule on this line (if there is one)
ROMol *mol = getSmarts(tmpstr);
if (mol) {
funcGroups.push_back(ROMOL_SPTR(mol));
nRead++;
}
}
return funcGroups;
}
MOL_SPTR_VECT readFuncGroups(std::string fileName) {
std::ifstream inStream(fileName.c_str());
if ((!inStream) || (inStream.bad())) {
std::ostringstream errout;
errout << "Bad input file " << fileName;
throw BadFileException(errout.str());
}
MOL_SPTR_VECT funcGroups;
funcGroups = readFuncGroups(inStream);
return funcGroups;
}
MatchVectType findFuncGroupsOnMol(const ROMol &mol, const FragCatParams *params,
INT_VECT &fgBonds) {
PRECONDITION(params, "bad params");
fgBonds.clear();
std::pair<int, int> amat;
MatchVectType aidFgrps;
std::vector<MatchVectType> fgpMatches;
std::vector<MatchVectType>::const_iterator mati;
MatchVectType::const_iterator mi;
int aid;
// const ROMol *fgrp;
INT_VECT_CI bi;
aidFgrps.clear();
int fid = 0;
const MOL_SPTR_VECT &fgrps = params->getFuncGroups();
MOL_SPTR_VECT::const_iterator fgci;
for (fgci = fgrps.begin(); fgci != fgrps.end(); fgci++) {
const ROMol *fgrp = fgci->get();
std::string fname;
(*fgci)->getProp(common_properties::_Name, fname);
// std::cout << "Groups number: " << fname << "\n";
//(*fgci)->debugMol(std::cout);
// mol->debugMol(std::cout);
// match this functional group onto the molecule
SubstructMatch(mol, *fgrp, fgpMatches);
// loop over all the matches we get for this fgroup
for (mati = fgpMatches.begin(); mati != fgpMatches.end(); mati++) {
// FIX: we will assume that the first atom in fgrp is always the
// connection
// atom
amat = mati->front();
aid = amat.second; // FIX: is this correct - the second entry in the pair
// is the atom ID from mol
// grab the list of atom Ids from mol that match the functional group
INT_VECT bondIds, maids;
for (mi = mati->begin(); mi != mati->end(); mi++) {
maids.push_back(mi->second);
}
// create a list of bond IDs from these atom ID
// these are the bond in mol that are part of portion that matches the
// functional group
bondIds = Subgraphs::bondListFromAtomList(mol, maids);
// now check if all these bonds have been covered as part of larger
// functional group that was dealt with earlier
// FIX: obviously we assume here that the function groups in params
// come in decreasing order of size.
bool allDone = true;
for (bi = bondIds.begin(); bi != bondIds.end(); bi++) {
if (std::find(fgBonds.begin(), fgBonds.end(), (*bi)) == fgBonds.end()) {
allDone = false;
fgBonds.push_back(*bi);
}
}
if (!allDone) {
// this functional group mapping onto mol is not part of a larger func
// group mapping so record it
aidFgrps.push_back(std::pair<int, int>(aid, fid));
}
}
fid++;
}
return aidFgrps;
}
ROMol *prepareMol(const ROMol &mol, const FragCatParams *fparams,
MatchVectType &aToFmap) {
PRECONDITION(fparams, "");
// get a mapping of the functional groups onto the molecule
INT_VECT fgBonds;
MatchVectType aidToFid = findFuncGroupsOnMol(mol, fparams, fgBonds);
// get the core piece of molecule (i.e. the part of the molecule
// without the functional groups). This basically the part of the molecule
// that does not contain the function group bonds given by "fgBonds"
INT_VECT cBonds;
int bid, nbds = mol.getNumBonds();
for (bid = 0; bid < nbds; bid++) {
if (std::find(fgBonds.begin(), fgBonds.end(), bid) == fgBonds.end()) {
cBonds.push_back(bid);
}
}
INT_MAP_INT
aIdxMap; // a map from atom id in mol to the new atoms id in coreMol
ROMol *coreMol = Subgraphs::pathToSubmol(mol, cBonds, false, aIdxMap);
// now map the functional groups on mol to coreMol using aIdxMap
MatchVectType::iterator mati;
int newID;
for (mati = aidToFid.begin(); mati != aidToFid.end(); mati++) {
newID = aIdxMap[mati->first];
aToFmap.push_back(std::pair<int, int>(newID, mati->second));
}
return coreMol;
}
}
|