1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
//
// Copyright (C) 2014 Novartis Institutes for BioMedical Research
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/export.h>
#pragma once
#include <list>
#include <vector>
#include <string>
#include <stdexcept>
#include "../RDKitBase.h"
#include "Graph.h"
#include "Seed.h"
#include "DebugTrace.h" // algorithm filter definitions
namespace RDKit {
namespace FMCS {
class RDKIT_FMCS_EXPORT SubstructureCache {
public:
#pragma pack(push, 1)
struct KeyNumericMetrics {
typedef unsigned long long TValue;
TValue Value{0};
public:
KeyNumericMetrics() {}
};
#pragma pack(pop)
struct HashKey {
KeyNumericMetrics NumericMetrics;
public:
void computeKey(const Seed &seed,
const std::vector<unsigned int> &queryAtomLabels,
const std::vector<unsigned int> &queryBondLabels) {
computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
}
private:
void computeMorganCodeHash(
const Seed &seed, const std::vector<unsigned int> &queryAtomLabels,
const std::vector<unsigned int> &queryBondLabels) {
size_t nv = seed.getNumAtoms();
size_t ne = seed.getNumBonds();
std::vector<unsigned long> currCodes(nv);
std::vector<unsigned long> prevCodes(nv);
size_t nIterations = seed.getNumBonds();
if (nIterations > 5) {
nIterations = 5;
}
for (unsigned int seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
++seedAtomIdx) {
currCodes[seedAtomIdx] = queryAtomLabels.at(
seed.MoleculeFragment.Atoms.at(seedAtomIdx)->getIdx());
}
for (size_t iter = 0; iter < nIterations; ++iter) {
for (size_t i = 0; i < nv; ++i) {
prevCodes[i] = currCodes[i];
}
for (size_t seedBondIdx = 0; seedBondIdx < ne; ++seedBondIdx) {
const Bond *bond = seed.MoleculeFragment.Bonds[seedBondIdx];
unsigned int order = queryBondLabels.at(
seed.MoleculeFragment.Bonds.at(seedBondIdx)->getIdx());
unsigned int atom1 = seed.MoleculeFragment.SeedAtomIdxMap
.find(bond->getBeginAtomIdx())
->second;
unsigned int atom2 =
seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getEndAtomIdx())
->second;
unsigned int v1 = prevCodes[atom1];
unsigned int v2 = prevCodes[atom2];
currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
}
}
KeyNumericMetrics::TValue result = 0;
for (unsigned int seedAtomIdx = 0; seedAtomIdx < nv; ++seedAtomIdx) {
unsigned long code = currCodes[seedAtomIdx];
result += code * (code + 6849) + 29;
}
NumericMetrics.Value = result;
}
// not implemented yet:
/*
void computeFingerprint(const Seed& seed)
{
unsigned int radius = seed.getNumBonds();
if (radius > 5)
radius = 5;
ExplicitBitVect *mf =
RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
radius); //SLOW !!!
// ...
delete mf;
NumericMetrics.Field.hasFingerprint = 1;
}
*/
};
typedef HashKey TKey;
typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
private:
std::vector<TIndexEntry> ValueStorage;
std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
public:
// returns computed key, and pointer to index entry with a set of subgraphs
// corresponding to the key if found.
// then caller must find exactly matched subgraph in the result set with own
// search algorithm,
// including a resolving of collisions of hash key
TIndexEntry *find(const Seed &seed,
const std::vector<unsigned int> &queryAtomLabels,
const std::vector<unsigned int> &queryBondLabels,
TKey &key) { // compute key and find entry
key.computeKey(seed, queryAtomLabels, queryBondLabels);
const auto entryit = NumericIndex.find(key.NumericMetrics.Value);
if (NumericIndex.end() != entryit) {
return &ValueStorage[entryit->second];
}
return nullptr; // not found
}
// if find() did not found any entry for this key of seed a new entry will be
// created
void add(const Seed &seed, TKey &key,
TIndexEntry *entry) { // "compute" value and store it in NEW entry
// if not found
if (!entry) {
try {
ValueStorage.emplace_back();
} catch (...) {
return; // not enough memory room to add the item, but it's just a
// cache
}
entry = &ValueStorage.back();
}
entry->push_back(seed.Topology);
if (!NumericIndex
.insert(std::make_pair(key.NumericMetrics.Value,
ValueStorage.size() - 1))
.second) {
return; // not enough memory room to add the item, but it is just cache
}
}
size_t keyssize() const { // for statistics only
return ValueStorage.size();
}
size_t fullsize() const { // for statistics only
return std::accumulate(
ValueStorage.begin(), ValueStorage.end(), 0,
[](const auto &acc, const auto &v) { return acc + v.size(); });
}
};
} // namespace FMCS
} // namespace RDKit
|