File: SubstructureCache.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (171 lines) | stat: -rw-r--r-- 5,746 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
//
//  Copyright (C) 2014 Novartis Institutes for BioMedical Research
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#pragma once
#include <list>
#include <vector>
#include <string>
#include <stdexcept>
#include "../RDKitBase.h"
#include "Graph.h"
#include "Seed.h"
#include "DebugTrace.h"  // algorithm filter definitions

namespace RDKit {
namespace FMCS {
class RDKIT_FMCS_EXPORT SubstructureCache {
 public:
#pragma pack(push, 1)
  struct KeyNumericMetrics {
    typedef unsigned long long TValue;
    TValue Value{0};

   public:
    KeyNumericMetrics() {}
  };
#pragma pack(pop)

  struct HashKey {
    KeyNumericMetrics NumericMetrics;

   public:
    void computeKey(const Seed &seed,
                    const std::vector<unsigned int> &queryAtomLabels,
                    const std::vector<unsigned int> &queryBondLabels) {
      computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
    }

   private:
    void computeMorganCodeHash(
        const Seed &seed, const std::vector<unsigned int> &queryAtomLabels,
        const std::vector<unsigned int> &queryBondLabels) {
      size_t nv = seed.getNumAtoms();
      size_t ne = seed.getNumBonds();
      std::vector<unsigned long> currCodes(nv);
      std::vector<unsigned long> prevCodes(nv);
      size_t nIterations = seed.getNumBonds();
      if (nIterations > 5) {
        nIterations = 5;
      }

      for (unsigned int seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
           ++seedAtomIdx) {
        currCodes[seedAtomIdx] = queryAtomLabels.at(
            seed.MoleculeFragment.Atoms.at(seedAtomIdx)->getIdx());
      }

      for (size_t iter = 0; iter < nIterations; ++iter) {
        for (size_t i = 0; i < nv; ++i) {
          prevCodes[i] = currCodes[i];
        }

        for (size_t seedBondIdx = 0; seedBondIdx < ne; ++seedBondIdx) {
          const Bond *bond = seed.MoleculeFragment.Bonds[seedBondIdx];
          unsigned int order = queryBondLabels.at(
              seed.MoleculeFragment.Bonds.at(seedBondIdx)->getIdx());
          unsigned int atom1 = seed.MoleculeFragment.SeedAtomIdxMap
                                   .find(bond->getBeginAtomIdx())
                                   ->second;
          unsigned int atom2 =
              seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getEndAtomIdx())
                  ->second;
          unsigned int v1 = prevCodes[atom1];
          unsigned int v2 = prevCodes[atom2];

          currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
          currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
        }
      }

      KeyNumericMetrics::TValue result = 0;
      for (unsigned int seedAtomIdx = 0; seedAtomIdx < nv; ++seedAtomIdx) {
        unsigned long code = currCodes[seedAtomIdx];
        result += code * (code + 6849) + 29;
      }

      NumericMetrics.Value = result;
    }

    // not implemented yet:
    /*
                void computeFingerprint(const Seed& seed)
                {
                unsigned int radius = seed.getNumBonds();
                if (radius > 5)
                    radius = 5;
                ExplicitBitVect *mf =
       RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
       radius);   //SLOW !!!
                // ...
                delete mf;
                    NumericMetrics.Field.hasFingerprint = 1;
                }
    */
  };

  typedef HashKey TKey;
  typedef std::list<FMCS::Graph> TIndexEntry;  // hash-key is not unique key
 private:
  std::vector<TIndexEntry> ValueStorage;
  std::map<KeyNumericMetrics::TValue, size_t> NumericIndex;  // TIndexEntry
 public:
  // returns computed key, and pointer to index entry with a set of subgraphs
  // corresponding to the key if found.
  // then caller must find exactly matched subgraph in the result set with own
  // search algorithm,
  // including a resolving of collisions of hash key
  TIndexEntry *find(const Seed &seed,
                    const std::vector<unsigned int> &queryAtomLabels,
                    const std::vector<unsigned int> &queryBondLabels,
                    TKey &key) {  // compute key and find entry
    key.computeKey(seed, queryAtomLabels, queryBondLabels);
    const auto entryit = NumericIndex.find(key.NumericMetrics.Value);
    if (NumericIndex.end() != entryit) {
      return &ValueStorage[entryit->second];
    }
    return nullptr;  // not found
  }

  // if find() did not found any entry for this key of seed a new entry will be
  // created
  void add(const Seed &seed, TKey &key,
           TIndexEntry *entry) {  // "compute" value and store it in NEW entry
                                  // if not found
    if (!entry) {
      try {
        ValueStorage.emplace_back();
      } catch (...) {
        return;  // not enough memory room to add the item, but it's just a
                 // cache
      }
      entry = &ValueStorage.back();
    }
    entry->push_back(seed.Topology);

    if (!NumericIndex
             .insert(std::make_pair(key.NumericMetrics.Value,
                                    ValueStorage.size() - 1))
             .second) {
      return;  // not enough memory room to add the item, but it is just cache
    }
  }

  size_t keyssize() const {  // for statistics only
    return ValueStorage.size();
  }

  size_t fullsize() const {  // for statistics only
    return std::accumulate(
        ValueStorage.begin(), ValueStorage.end(), 0,
        [](const auto &acc, const auto &v) { return acc + v.size(); });
  }
};
}  // namespace FMCS
}  // namespace RDKit