1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
|
// $Id$
//
// Copyright (C) 2003-2006 Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <Catalogs/Catalog.h>
#include "FragFPGenerator.h"
#include "FragCatalogEntry.h"
#include "FragCatParams.h"
#include "FragCatalogUtils.h"
#include <RDGeneral/types.h>
#include <DataStructs/BitVects.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/Subgraphs/SubgraphUtils.h>
#include <GraphMol/Subgraphs/Subgraphs.h>
namespace RDKit {
ExplicitBitVect *FragFPGenerator::getFPForMol(const ROMol &mol,
const FragCatalog &fcat) {
INT_PATH_LIST_MAP allPaths;
auto *fp = new ExplicitBitVect(fcat.getFPLength());
const FragCatParams *fparams = fcat.getCatalogParams();
// prepare the molecule for fingerprinting
// i.e. find functional groups, remove them from the mol etc.
MatchVectType newAidToFid;
ROMol *coreMol = prepareMol(mol, fparams, newAidToFid);
computeFP(*coreMol, fcat, newAidToFid, fp);
// this was Issue294;
delete coreMol;
return fp;
}
void FragFPGenerator::computeFP(const ROMol &mol, const FragCatalog &fcat,
const MatchVectType &aidToFid,
ExplicitBitVect *fp) {
PRECONDITION(fp, "Bad ExplicitBitVect - FingerPrint");
const FragCatParams *fparams = fcat.getCatalogParams();
int uLen = fparams->getUpperFragLength();
double tol = fparams->getTolerance();
DOUBLE_INT_MAP mapkm1, mapk;
// get all the paths in the molecule mapped by their order
INT_PATH_LIST_MAP allPathsMap = findAllSubgraphsOfLengthsMtoN(mol, 1, uLen);
// first deal with order 1 stuff
PATH_LIST_CI pi;
INT_VECT_CI eti;
const INT_VECT &o1entries = fcat.getEntriesOfOrder(1);
const FragCatalogEntry *entry;
int bitId;
double invar;
for (pi = allPathsMap[1].begin(); pi != allPathsMap[1].end(); pi++) {
// std::cout << "-*-*-* Fragment *-*-*-*-" << std::endl;
auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid);
nent->setDescription(fparams);
invar = computeIntVectPrimesProduct(*pi);
// ok here is the plan - initialize the entry for this path in mapkm1 to -1
// which will be overwritten to the correct entry id in the catalog if we
// find
// a match. This -1 initialization will be useful when we move onto higher
// order stuff
mapkm1[invar] = -1;
for (eti = o1entries.begin(); eti != o1entries.end(); eti++) {
entry = fcat.getEntryWithIdx(*eti);
if (nent->match(entry, tol)) {
bitId = entry->getBitId();
if (bitId >= 0) {
fp->setBit(bitId);
}
mapkm1[invar] = (*eti);
delete nent;
break;
}
}
}
// now deal with the higher order stuff.
// - for each higher order path obtain the invariants for order k-1 subpaths
// - if entries for this invariant values exist in mapkm1, it means that we
// have
// seen this subpath before
// - if the value for this entry is -1 it mean that we did not find a match
// for this subpath
// in the catalog i.e. we wont find a match for this order k path either
// - if the entries for all order k-1 subpaths have non-negative values -
// search the intersection
// of the down entries for these catalog entries to find a match for the
// order k path
INT_PATH_LIST_MAP_CI ordi;
double sinvar;
int entId;
for (ordi = allPathsMap.begin(); ordi != allPathsMap.end(); ordi++) {
if (ordi->first < 2) {
continue; // ignore order 1 paths - we are done with them
}
mapk.clear();
for (pi = ordi->second.begin(); pi != ordi->second.end(); pi++) {
invar = computeIntVectPrimesProduct(*pi);
mapk[invar] = -1;
auto *nent = new FragCatalogEntry(&mol, (*pi), aidToFid);
nent->setDescription(fparams);
// std::cout << "Testing 2nd order fragment: " << nent->getDescription()
// << std::endl;
int scnt = 0;
INT_VECT intersect, tmpVect;
INT_VECT_CI iti;
PATH_TYPE::const_iterator pii;
// loop over the subpaths (order (k-1) ) (by ignoring one bond
// at a time from consideration) and find out which entries
// int eh catalog they correspond to and make an interestion
// of the down entries (i.e. order k entries that contain
// these order k-1 entries. - we can baiscally limit our
// search for an isomorphic entry in the catalog of the order
// k path from the molecule to this intersection list
for (pii = (*pi).begin(); pii != (*pi).end(); pii++) {
sinvar = invar / firstThousandPrimes[*pii];
// here is a check for "did we see this path before ?"
// this should take care of disconnected subpaths (since the
// catalog should have only connected subgraphs)
if (mapkm1.find(sinvar) == mapkm1.end()) {
continue;
}
if (mapkm1[sinvar] == -1) {
// we have seen this order k-1 pat before but we couldn't find a match
// for it in the catalog
// which mean that we wont find a match for the order k path either in
// the catalog
intersect
.clear(); // the intersection list we built so far is useless
break;
}
entId = mapkm1[sinvar];
if (scnt == 0) {
intersect = fcat.getDownEntryList(entId);
scnt++;
} else {
tmpVect = intersect;
Intersect(fcat.getDownEntryList(entId), tmpVect, intersect);
scnt++;
}
}
// now search through the intersection list to check if we already have a
// isomorphic
// entry in the catalog
for (iti = intersect.begin(); iti != intersect.end(); iti++) {
entry = fcat.getEntryWithIdx(*iti);
if (nent->match(entry, tol)) {
mapk[invar] = (*iti);
bitId = entry->getBitId();
if (bitId >= 0) {
fp->setBit(bitId);
}
delete nent;
break;
}
}
}
// overwrite mapkm1 with mapk before we move on to order k+1
mapkm1 = mapk;
} // end of loop over path order
#if 0
/**************************************************************
* Another way of dealing with higher order paths - didn't work
* too well for compound with lots of matches
PATH_LIST_CI km11, km12, tmpM;
PATH_TYPE oKpath;
INT_VECT dEnt1, dEnt2, intersect;
INT_VECT_CI iti;
double sinvar1, sinvar2;
int i, j, eid1, eid2;
int ord = 2;
for (ord = 2; ord <= uLen; ord++) {
std::cout << "Order: " << ord << " " << km1matches.size() << "\n";
kmatches.clear();
mapk.clear();
for (km11 = km1matches.begin(); km11 != km1matches.end(); km11++) {
tmpM = km11;
tmpM++;
for (km12 = tmpM; km12 != km1matches.end(); km12++) {
Union((*km11), (*km12), oKpath);
if (oKpath.size() == ord) {
FragCatalogEntry *nent = new FragCatalogEntry(mol, oKpath, aidToFid);
sinvar1 = computeIntVectPrimesProduct(*km11);
sinvar2 = computeIntVectPrimesProduct(*km12);
eid1 = mapkm1[sinvar1];
eid2 = mapkm1[sinvar2];
Intersect(fcat.getDownEntryList(eid1),
fcat.getDownEntryList(eid2),
intersect);
// now search through the intersection list to check if we have a isomorphic
// entry in the catalog
for (iti = intersect.begin(); iti != intersect.end(); iti++) {
entry = fcat.getEntryWithIdx(*iti);
if (nent->match(entry, tol) ) {
bitId = entry->getBitId();
if (bitId >= 0) {
fp->setBit(bitId);
}
invar = computeIntVectPrimesProduct(oKpath);
kmatches.push_back(oKpath);
mapk[invar] = (*iti);
}
}// end of for loop over intersect entries
} // end of if block (if we found a order path)
}
}
km1matches = kmatches;
mapkm1 = mapk;
} // end of loop over higher order paths order
**************************/
#endif
}
}
|