File: AlignMolecules.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (305 lines) | stat: -rw-r--r-- 14,747 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
//
//  Copyright (C) 2001-2022 Greg Landrum and other RDKit contributors
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef _RD_ALIGNMOLECULES_H_
#define _RD_ALIGNMOLECULES_H_

#include <Geometry/Transform3D.h>
#include <Numerics/Vector.h>
#include <vector>

namespace RDKit {
typedef std::vector<std::pair<int, int>> MatchVectType;

class Conformer;
class ROMol;
class RWMol;
namespace MolAlign {
class RDKIT_MOLALIGN_EXPORT MolAlignException : public std::exception {
 public:
  //! construct with an error message
  MolAlignException(const char *msg) : _msg(msg) {}
  //! construct with an error message
  MolAlignException(const std::string msg) : _msg(msg) {}
  //! get the error message
  const char *what() const noexcept override { return _msg.c_str(); }
  ~MolAlignException() noexcept override = default;

 private:
  std::string _msg;
};

//! Alignment functions

//! Compute the transformation required to align a molecule
/*!
  The 3D transformation required to align the specified conformation in the
  probe molecule to a specified conformation in the reference molecule is
  computed so that the root mean squared distance between a specified set of
  atoms is minimized

  \param prbMol    molecule that is to be aligned
  \param refMol    molecule used as the reference for the alignment
  \param trans     storage for the computed transform
  \param prbCid    ID of the conformation in the probe to be used
                   for the alignment (defaults to first conformation)
  \param refCid    ID of the conformation in the ref molecule to which
                   the alignment is computed (defaults to first conformation)
  \param atomMap   a vector of pairs of atom IDs (probe AtomId, ref AtomId)
                   used to compute the alignments. If this mapping is
                   not specified an attempt is made to generate on by
                   substructure matching
  \param weights   Optionally specify weights for each of the atom pairs
  \param reflect   if true reflect the conformation of the probe molecule
  \param maxIters  maximum number of iterations used in minimizing the RMSD

  <b>Returns</b>
  RMSD value
*/
RDKIT_MOLALIGN_EXPORT double getAlignmentTransform(
    const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &trans,
    int prbCid = -1, int refCid = -1, const MatchVectType *atomMap = nullptr,
    const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
    unsigned int maxIters = 50);

//! Optimally (minimum RMSD) align a molecule to another molecule
/*!
  The 3D transformation required to align the specified conformation in the
  probe molecule to a specified conformation in the reference molecule is
  computed so that the root mean squared distance between a specified set of
  atoms is minimized. This transform is then applied to the specified
  conformation in the probe molecule

  \param prbMol    molecule that is to be aligned
  \param refMol    molecule used as the reference for the alignment
  \param prbCid    ID of the conformation in the probe to be used
                   for the alignment (defaults to first conformation)
  \param refCid    ID of the conformation in the ref molecule to which
                   the alignment is computed (defaults to first conformation)
  \param atomMap   a vector of pairs of atom IDs (probe AtomId, ref AtomId)
                   used to compute the alignments. If this mapping is
                   not specified an attempt is made to generate on by
                   substructure matching
  \param weights   Optionally specify weights for each of the atom pairs
  \param reflect   if true reflect the conformation of the probe molecule
  \param maxIters  maximum number of iterations used in minimizing the RMSD

  <b>Returns</b>
  RMSD value
*/
RDKIT_MOLALIGN_EXPORT double alignMol(
    ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
    const MatchVectType *atomMap = nullptr,
    const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
    unsigned int maxIters = 50);

//! Compute the optimal RMS, transformation and atom map for aligning
//! two molecules, taking symmetry into account. Molecule coordinates
//! are left unaltered.
/*!
  This function will attempt to align all permutations of matching atom
  orders in both molecules, for some molecules it will lead to 'combinatorial
  explosion' especially if hydrogens are present.
  Use 'RDKit::MolAlign::getAlignmentTransform' to align molecules
  without changing the atom order.

  \param prbMol     the molecule to be aligned to the reference
  \param refMol     the reference molecule
  \param bestTrans  storage for the best computed transform
  \param bestMatch  storage for the MatchVectType corresponding to
                    the best match found.
  \param prbCid     (optional) probe conformation to use
  \param refCid     (optional) reference conformation to use
  \param map        (optional) a vector of vectors of pairs of atom IDs
                    (probe AtomId, ref AtomId) used to compute the alignments.
                    If not provided, these will be generated using a
                    substructure search.
  \param maxMatches (optional) if map is empty, this will be the max number of
                    matches found in a SubstructMatch().
  \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
                    terminal functional groups (like nitro or carboxylate)
                    will be considered symmetrically
  \param weights    (optional) weights for each pair of atoms.
  \param reflect    if true reflect the conformation of the probe molecule
  \param maxIters   maximum number of iterations used in minimizing the RMSD
  \param numThreads (optional) number of threads to use during the calculation

  <b>Returns</b>
  Best RMSD value found
*/
RDKIT_MOLALIGN_EXPORT double getBestAlignmentTransform(
    const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &bestTrans,
    MatchVectType &bestMatch, int prbCid = -1, int refCid = -1,
    const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
    int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
    const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
    unsigned int maxIters = 50, int numThreads = 1);

//! Returns the optimal RMS for aligning two molecules, taking
/// symmetry into account. As a side-effect, the probe molecule is
/// left in the aligned state.
/*!
  This function will attempt to align all permutations of matching atom
  orders in both molecules, for some molecules it will lead to 'combinatorial
  explosion' especially if hydrogens are present.
  Use 'RDKit::MolAlign::alignMol' to align molecules without changing the
  atom order.

  \param prbMol     the molecule to be aligned to the reference
  \param refMol     the reference molecule
  \param trans      storage for the computed transform
  \param prbCid     (optional) probe conformation to use
  \param refCid     (optional) reference conformation to use
  \param map        (optional) a vector of vectors of pairs of atom IDs
                    (probe AtomId, ref AtomId) used to compute the alignments.
                    If not provided, these will be generated using a
                    substructure search.
  \param maxMatches (optional) if map is empty, this will be the max number of
                    matches found in a SubstructMatch().
  \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
                    terminal functional groups (like nitro or carboxylate)
                    will be considered symmetrically
  \param weights    (optional) weights for each pair of atoms.
  \param numThreads (optional) number of threads to use during the calculation

  <b>Returns</b>
  Best RMSD value found
*/
RDKIT_MOLALIGN_EXPORT double getBestRMS(
    ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
    const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
    int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
    const RDNumeric::DoubleVector *weights = nullptr, int numThreads = 1);

//! Returns the symmetric distance matrix between the conformers of a
//! molecule.
/// getBestRMS() is used to calculate the inter-conformer distances
/*!
  This function will attempt to align all permutations of matching atom
  orders in both molecules, for some molecules it will lead to 'combinatorial
  explosion' especially if hydrogens are present.

  \param mol        the molecule to be considered
  \param numThreads (optional) number of threads to use during the calculation
  \param map        (optional) a vector of vectors of pairs of atom IDs
                    (probe AtomId, ref AtomId) used to compute the alignments.
                    If not provided, these will be generated using a
                    substructure search.
  \param maxMatches (optional) if map is empty, this will be the max number of
                    matches found in a SubstructMatch().
  \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
                    terminal functional groups (like nitro or carboxylate)
                    will be considered symmetrically
  \param weights    (optional) weights for each pair of atoms.

  <b>Returns</b>
  a vector with the RMSD values stored in the order:
    [(1,0), (2,0), (2,1), (3,0), (3, 2), (3,1), ...]
*/
RDKIT_MOLALIGN_EXPORT std::vector<double> getAllConformerBestRMS(
    const ROMol &mol, int numThreads = 1,
    const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
    int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
    const RDNumeric::DoubleVector *weights = nullptr);

//! Returns the RMS between two molecules, taking symmetry into account.
//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
//! probe molecules are not aligned to the reference ahead of the
//! RMS calculation. This is useful, for example, to compute
//! the RMSD between docking poses and the co-crystallized ligand.
/*!
  This function will attempt to match all permutations of matching atom
  orders in both molecules, for some molecules it will lead to 'combinatorial
  explosion' especially if hydrogens are present.

  \param prbMol     the molecule to be aligned to the reference
  \param refMol     the reference molecule
  \param prbCid     (optional) probe conformation to use
  \param refCid     (optional) reference conformation to use
  \param map        (optional) a vector of vectors of pairs of atom IDs
                    (probe AtomId, ref AtomId) used to compute the alignments.
                    If not provided, these will be generated using a
                    substructure search.
  \param maxMatches (optional) if map is empty, this will be the max number of
                    matches found in a SubstructMatch().
  \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
                    terminal functional groups (like nitro or carboxylate)
  will be considered symmetrically \param weights    (optional) weights for
  each pair of atoms.

  <b>Returns</b>
  Best RMSD value found
*/
RDKIT_MOLALIGN_EXPORT double CalcRMS(
    ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
    const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
    int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
    const RDNumeric::DoubleVector *weights = nullptr);

//! Returns the RMS between two molecules, taking symmetry into account.
//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
//! probe molecules are not aligned to the reference ahead of the
//! RMS calculation. This is useful, for example, to compute
//! the RMSD between docking poses and the co-crystallized ligand.
/*!
  This function will attempt to match all permutations of matching atom
  orders in both molecules, for some molecules it will lead to 'combinatorial
  explosion' especially if hydrogens are present.

  \param prbMol     the molecule to be aligned to the reference
  \param refMol     the reference molecule
  \param prbCid     (optional) probe conformation to use
  \param refCid     (optional) reference conformation to use
  \param map        (optional) a vector of vectors of pairs of atom IDs
                    (probe AtomId, ref AtomId) used to compute the alignments.
                    If not provided, these will be generated using a
                    substructure search.
  \param maxMatches (optional) if map is empty, this will be the max number of
                    matches found in a SubstructMatch().
  \param weights    (optional) weights for each pair of atoms.

  <b>Returns</b>
  Best RMSD value found
*/
RDKIT_MOLALIGN_EXPORT double CalcRMS(ROMol &prbMol, const ROMol &refMol,
                                     int prbCid, int refCid,
                                     const std::vector<MatchVectType> &map,
                                     int maxMatches,
                                     const RDNumeric::DoubleVector *weights);

//! Align the conformations of a molecule using a common set of atoms. If
/// the molecules contains queries, then the queries must also match exactly.

/*!
  \param mol       The molecule of interest.
  \param atomIds   vector of atoms to be used to generate the alignment.
                   All atoms will be used is not specified
  \param confIds   vector of conformations to align - defaults to all
  \param weights   (optional) weights for each pair of atoms.
  \param reflect   toggles reflecting (about the origin) the alignment
  \param maxIters  the maximum number of iterations to attempt
  \param RMSlist   if nonzero, this will be used to return the RMS values
                   between the reference conformation and the other aligned
                   conformations
*/
RDKIT_MOLALIGN_EXPORT void alignMolConformers(
    ROMol &mol, const std::vector<unsigned int> *atomIds = nullptr,
    const std::vector<unsigned int> *confIds = nullptr,
    const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
    unsigned int maxIters = 50, std::vector<double> *RMSlist = nullptr);

namespace details {
//! Converts terminal atoms in groups like nitro or carboxylate to be symmetry
/// equivalent
RDKIT_MOLALIGN_EXPORT void symmetrizeTerminalAtoms(RWMol &mol);
}  // namespace details
}  // namespace MolAlign
}  // namespace RDKit
#endif