File: SubstructMatch.h

package info (click to toggle)
rdkit 202503.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 222,000 kB
  • sloc: cpp: 411,111; python: 78,482; ansic: 26,181; java: 8,285; javascript: 4,404; sql: 2,393; yacc: 1,626; lex: 1,267; cs: 1,090; makefile: 581; xml: 229; fortran: 183; sh: 121
file content (262 lines) | stat: -rw-r--r-- 10,828 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
//
//  Copyright (C) 2001-2025 Greg Landrum and other RDKit contributors
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_SUBSTRUCTMATCH_H
#define RD_SUBSTRUCTMATCH_H

// std bits
#include <vector>

#include <unordered_set>
#include <functional>
#include <unordered_map>
#include <cstdint>
#include <string>
#include <span>

#include <boost/dynamic_bitset.hpp>
#if BOOST_VERSION >= 107100
#define RDK_INTERNAL_BITSET_HAS_HASH
#endif

#include <GraphMol/StereoGroup.h>

namespace RDKit {
class ROMol;
class Atom;
class Bond;
class ResonanceMolSupplier;
class MolBundle;

//! \brief used to return matches from substructure searching,
//!   The format is (queryAtomIdx, molAtomIdx)
typedef std::vector<std::pair<int, int>> MatchVectType;

struct RDKIT_SUBSTRUCTMATCH_EXPORT SubstructMatchParameters {
  bool useChirality = false;  //!< Use chirality in determining whether or not
                              //!< atoms/bonds match
  bool useEnhancedStereo = false;  //!< Use enhanced stereochemistry in
                                   //!< determining whether atoms/bonds match
  bool aromaticMatchesConjugated = false;  //!< Aromatic and conjugated bonds
                                           //!< match each other
  bool useQueryQueryMatches = false;  //!< Consider query-query matches, not
                                      //!< just simple matches
  bool useGenericMatchers = false;    //!< Looks for generic atoms in the query
                                      //!< and uses them as part of the matching
  bool recursionPossible = true;      //!< Allow recursive queries
  bool uniquify = true;            //!< uniquify (by atom index) match results
  unsigned int maxMatches = 1000;  //!< maximum number of matches to return
  int numThreads = 1;  //!< number of threads to use when multi-threading
                       //!< is possible. 0 selects the number of
                       //!< concurrent threads supported by the hardware
                       //!< negative values are added to the number of
                       //!< concurrent threads supported by the hardware
  std::vector<std::string> atomProperties;  //!< atom properties that must be
                                            //!< equivalent in order to match
  std::vector<std::string> bondProperties;  //!< bond properties that must be
                                            //!< equivalent in order to match
  std::function<bool(const ROMol &mol,
                     std::span<const unsigned int> match)>
      extraFinalCheck;  //!< a function to be called at the end to validate a
                        //!< match
  unsigned int maxRecursiveMatches =
      1000;  //!< maximum number of matches that the recursive substructure
  //!< matching should return
  bool specifiedStereoQueryMatchesUnspecified =
      false;  //!< If set, query atoms and bonds with specified stereochemistry
              //!< will match atoms and bonds with unspecified stereochemistry
  bool aromaticMatchesSingleOrDouble = false;  //!< Aromatic bonds match single
                                               //!< or double bonds
  SubstructMatchParameters() {}
};

RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(
    SubstructMatchParameters &params, const std::string &json);
RDKIT_SUBSTRUCTMATCH_EXPORT std::string substructMatchParamsToJSON(
    const SubstructMatchParameters &params);

//! Find a substructure match for a query in a molecule
/*!
    \param mol         The ROMol to be searched
    \param query       The query ROMol
    \param matchParams Parameters controlling the matching

    \return The matches, if any

*/
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
    const ROMol &mol, const ROMol &query,
    const SubstructMatchParameters &params = SubstructMatchParameters());

//! Find all substructure matches for a query in a ResonanceMolSupplier object
/*!
    \param resMolSuppl The ResonanceMolSupplier object to be searched
    \param query       The query ROMol
    \param matchParams Parameters controlling the matching

    \return The matches, if any

*/
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
    ResonanceMolSupplier &resMolSuppl, const ROMol &query,
    const SubstructMatchParameters &params = SubstructMatchParameters());

RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
    const MolBundle &bundle, const ROMol &query,
    const SubstructMatchParameters &params = SubstructMatchParameters());
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
    const ROMol &mol, const MolBundle &query,
    const SubstructMatchParameters &params = SubstructMatchParameters());
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
    const MolBundle &bundle, const MolBundle &query,
    const SubstructMatchParameters &params = SubstructMatchParameters());

//! Find a substructure match for a query
/*!
    \param mol       The object to be searched
    \param query     The query
    \param matchVect Used to return the match
                     (pre-existing contents will be deleted)
    \param recursionPossible  flags whether or not recursive matches are allowed
    \param useChirality  use atomic CIP codes as part of the comparison
    \param useQueryQueryMatches  if set, the contents of atom and bond queries
                                 will be used as part of the matching

    \return whether or not a match was found

*/
template <typename T1, typename T2>
bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
                    bool recursionPossible = true, bool useChirality = false,
                    bool useQueryQueryMatches = false) {
  SubstructMatchParameters params;
  params.recursionPossible = recursionPossible;
  params.useChirality = useChirality;
  params.useQueryQueryMatches = useQueryQueryMatches;
  params.maxMatches = 1;
  std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
  if (matchVects.size()) {
    matchVect = matchVects.front();
  } else {
    matchVect.clear();
  }
  return matchVect.size() != 0;
};

//! Find all substructure matches for a query
/*!
    \param mol       The object to be searched
    \param query     The query
    \param matchVect Used to return the matches
                     (pre-existing contents will be deleted)
    \param uniquify  Toggles uniquification (by atom index) of the results
    \param recursionPossible  flags whether or not recursive matches are allowed
    \param useChirality  use atomic CIP codes as part of the comparison
    \param useQueryQueryMatches  if set, the contents of atom and bond queries
                                 will be used as part of the matching
    \param maxMatches  The maximum number of matches that will be returned.
                       In high-symmetry cases with medium-sized molecules, it is
   very
                       easy to end up with a combinatorial explosion in the
   number of
                       possible matches. This argument prevents that from having
                       unintended consequences

    \return the number of matches found

*/
template <typename T1, typename T2>
unsigned int SubstructMatch(T1 &mol, const T2 &query,
                            std::vector<MatchVectType> &matchVect,
                            bool uniquify = true, bool recursionPossible = true,
                            bool useChirality = false,
                            bool useQueryQueryMatches = false,
                            unsigned int maxMatches = 1000,
                            int numThreads = 1) {
  SubstructMatchParameters params;
  params.uniquify = uniquify;
  params.recursionPossible = recursionPossible;
  params.useChirality = useChirality;
  params.useQueryQueryMatches = useQueryQueryMatches;
  params.maxMatches = maxMatches;
  params.numThreads = numThreads;
  matchVect = SubstructMatch(mol, query, params);
  return static_cast<unsigned int>(matchVect.size());
};

// ----------------------------------------------
//
// find one match in ResonanceMolSupplier object
//
template <>
inline bool SubstructMatch(ResonanceMolSupplier &resMolSupplier,
                           const ROMol &query, MatchVectType &matchVect,
                           bool recursionPossible, bool useChirality,
                           bool useQueryQueryMatches) {
  SubstructMatchParameters params;
  params.recursionPossible = recursionPossible;
  params.useChirality = useChirality;
  params.useQueryQueryMatches = useQueryQueryMatches;
  params.maxMatches = 1;
  std::vector<MatchVectType> matchVects =
      SubstructMatch(resMolSupplier, query, params);
  if (matchVects.size()) {
    matchVect = matchVects.front();
  } else {
    matchVect.clear();
  }
  return matchVect.size() != 0;
}

template <>
inline unsigned int SubstructMatch(ResonanceMolSupplier &resMolSupplier,
                                   const ROMol &query,
                                   std::vector<MatchVectType> &matchVect,
                                   bool uniquify, bool recursionPossible,
                                   bool useChirality, bool useQueryQueryMatches,
                                   unsigned int maxMatches, int numThreads) {
  SubstructMatchParameters params;
  params.uniquify = uniquify;
  params.recursionPossible = recursionPossible;
  params.useChirality = useChirality;
  params.useQueryQueryMatches = useQueryQueryMatches;
  params.maxMatches = maxMatches;
  params.numThreads = numThreads;
  matchVect = SubstructMatch(resMolSupplier, query, params);
  return static_cast<unsigned int>(matchVect.size());
};

//! Class used as a final step to confirm whether or not a given atom->atom
//! mapping is a valid substructure match.
class RDKIT_SUBSTRUCTMATCH_EXPORT MolMatchFinalCheckFunctor {
 public:
  MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
                            const SubstructMatchParameters &ps);

  bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);

 private:
  const ROMol &d_query;
  const ROMol &d_mol;
  const SubstructMatchParameters &d_params;
  std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
#ifdef RDK_INTERNAL_BITSET_HAS_HASH
  // Boost 1.71 added support for std::hash with dynamic_bitset.
  using HashedStorageType = boost::dynamic_bitset<>;
#else
  // otherwise we use a less elegant solution
  using HashedStorageType = std::string;
#endif
  std::unordered_set<HashedStorageType> matchesSeen;
};

}  // namespace RDKit

#endif