File: RDDepictor.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (469 lines) | stat: -rw-r--r-- 20,859 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
//
//  Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//

#include <RDGeneral/export.h>
#ifndef RDDEPICTOR_H
#define RDDEPICTOR_H

#include <GraphMol/Substruct/SubstructMatch.h>
#include <RDGeneral/types.h>
#include <Geometry/point.h>
#include <boost/smart_ptr.hpp>

namespace RDKit {
class ROMol;
}

namespace RDDepict {

RDKIT_DEPICTOR_EXPORT extern bool
    preferCoordGen;  // Ignored if coordgen support isn't active

typedef boost::shared_array<double> DOUBLE_SMART_PTR;

class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
 public:
  DepictException(const char *msg) : _msg(msg) {}
  DepictException(const std::string msg) : _msg(msg) {}
  const char *what() const noexcept override { return _msg.c_str(); }
  ~DepictException() noexcept override = default;

 private:
  std::string _msg;
};

//! \brief Set the path to the file containing the ring system templates
/*!

  \param templatePath the file path to a file containing the ring system
  templates. Each template must be a single line in the file represented using
  CXSMILES, and the structure should be a single ring system.

  \throws DepictException if any of the templates are invalid
*/
void RDKIT_DEPICTOR_EXPORT
setRingSystemTemplates(const std::string templatePath);

//! \brief Add ring system templates to be used in 2D coordinater generation.
/// If there are duplicates, the most recently added template will be used.
/*!

  \param templatePath the file path to a file containing the ring system
  templates. Each template must be a single line in the file represented using
  CXSMILES, and the structure should be a single ring system.

  \throws DepictException if any of the templates are invalid
*/
void RDKIT_DEPICTOR_EXPORT
addRingSystemTemplates(const std::string templatePath);

//! \brief Load default ring system templates to be used in 2D coordinate
//! generation
void RDKIT_DEPICTOR_EXPORT loadDefaultRingSystemTemplates();

struct RDKIT_DEPICTOR_EXPORT Compute2DCoordParameters {
  const RDGeom::INT_POINT2D_MAP *coordMap =
      nullptr;  //!< a map of int to Point2D, between atom IDs and their
                //!< locations.  This is the container the user needs to
                //!< fill if he/she wants to specify coordinates for a portion
                //!< of the molecule, defaults to 0
  bool canonOrient = false;  //!< canonicalize the orientation so that the long
                             //!< axes align with the x-axis etc.
  bool clearConfs = true;  //!< clear all existing conformations on the molecule
                           //!< before adding the 2D coordinates instead of
                           //!< simply adding to the list
  unsigned int nFlipsPerSample = 0;  //!< the number of rotatable bonds that are
                                     //!< flipped at random for each sample
  unsigned int nSamples = 0;         //!< the number of samples
  int sampleSeed = 0;                //!< seed for the random sampling process
  bool permuteDeg4Nodes = false;  //!< try permuting the drawing order of bonds
                                  //!< around atoms with four neighbors in order
                                  //!< to improve the depiction
  bool forceRDKit = false;        //!< use RDKit to generate coordinates even if
                                  //!< preferCoordGen is set to true
  bool useRingTemplates = false;  //!< whether to use ring system templates for
                                  //!< generating initial coordinates

};

//! \brief Generate 2D coordinates (a depiction) for a molecule
/*!

  \param mol the molecule were are interested in

  \param params parameters used for 2D coordinate generation

  \return ID of the conformation added to the molecule containing the
  2D coordinates

*/
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(
    RDKit::ROMol &mol, const Compute2DCoordParameters &params);

//! \brief Generate 2D coordinates (a depiction) for a molecule
/*!

  \param mol the molecule were are interested in

  \param coordMap a map of int to Point2D, between atom IDs and
  their locations.  This is the container the user needs to fill if
  he/she wants to specify coordinates for a portion of the molecule,
  defaults to 0

  \param canonOrient canonicalize the orientation so that the long
  axes align with the x-axis etc.

  \param clearConfs clear all existing conformations on the molecule
  before adding the 2D coordinates instead of simply adding to the
  list

  \param nFlipsPerSample - the number of rotatable bonds that are
  flipped at random for each sample

  \param nSamples - the number of samples

  \param sampleSeed - seed for the random sampling process

  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
        atoms with four neighbors in order to improve the depiction

  \param forceRDKit - use RDKit to generate coordinates even if
        preferCoordGen is set to true

  \param useRingTemplates whether to use ring system templates for generating
      initial coordinates

  \return ID of the conformation added to the molecule containing the
  2D coordinates

*/
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(
    RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap = nullptr,
    bool canonOrient = false, bool clearConfs = true,
    unsigned int nFlipsPerSample = 0, unsigned int nSamples = 0,
    int sampleSeed = 0, bool permuteDeg4Nodes = false, bool forceRDKit = false,
    bool useRingTemplates = false);

//! \brief Compute the 2D coordinates such the interatom distances
///  mimic those in a distance matrix
/*!

  This function generates 2D coordinates such that the inter-atom
  distances mimic those specified via dmat. This is done by randomly
  sampling(flipping) the rotatable bonds in the molecule and
  evaluating a cost function which contains two components. The
  first component is the sum of inverse of the squared inter-atom
  distances, this helps in spreading the atoms far from each
  other. The second component is the sum of squares of the
  difference in distance between those in dmat and the generated
  structure.  The user can adjust the relative importance of the two
  components via a adjustable parameter (see below)

  ARGUMENTS:

  \param mol - molecule to generate coordinates for

  \param dmat - the distance matrix we want to mimic, this is a
  symmetric N by N matrix where N is the number of atoms in mol. All
  negative entries in dmat are ignored.

  \param canonOrient - canonicalize the orientation after the 2D
  embedding is done

  \param clearConfs - clear any previously existing conformations on
  mol before adding a conformation

  \param weightDistMat - A value between 0.0 and 1.0, this
  determines the importance of mimicing the inter atoms
  distances in dmat. (1.0 - weightDistMat) is the weight associated
  to spreading out the structure (density) in the cost function

  \param nFlipsPerSample - the number of rotatable bonds that are
  flipped at random for each sample

  \param nSamples - the number of samples

  \param sampleSeed - seed for the random sampling process

  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
        atoms with four neighbors in order to improve the depiction

  \param forceRDKit - use RDKit to generate coordinates even if
        preferCoordGen is set to true

  \return ID of the conformation added to the molecule containing the
  2D coordinates


*/
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoordsMimicDistMat(
    RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat = nullptr,
    bool canonOrient = true, bool clearConfs = true, double weightDistMat = 0.5,
    unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100,
    int sampleSeed = 25, bool permuteDeg4Nodes = true, bool forceRDKit = false);

struct RDKIT_DEPICTOR_EXPORT ConstrainedDepictionParams {
  //! if false (default), a DepictException is thrown if the molecule
  /// does not have a substructure match to the reference;
  /// if true, an unconstrained depiction will be generated
  bool acceptFailure = false;
  //! if true, use RDKit to generate coordinates even if preferCoordGen
  /// is set to true; defaults to false
  bool forceRDKit = false;
  //! if true, terminal dummy atoms in the reference are ignored
  /// if they match an implicit hydrogen in the molecule or if they are
  /// attached to a query atom; defaults to false
  bool allowRGroups = false;
  //! if false (default), a part of the molecule is hard-constrained
  /// to have the same coordinates as the reference, and the rest of
  // the molecule is built around it; if true, coordinates
  /// from conformation existingConfId are preserved (if they exist)
  /// or generated without constraints (if they do not exist), then
  /// the conformation is rigid-body aligned to the reference
  bool alignOnly = false;
  //! if true (default), existing wedging information will be updated
  /// or cleared as required; if false (default), existing molblock
  /// wedging information will always be preserved
  bool adjustMolBlockWedging = true;
  //! conformation id whose 2D coordinates should be
  /// * rigid-body aligned to the reference (if alignOnly is true)
  /// * used to determine whether existing molblock wedging information
  ///   can be preserved following the constrained depiction (if
  ///   adjustMolBlockWedging is true)
  int existingConfId = -1;
};

//! \brief Compute 2D coordinates where a piece of the molecule is
///  constrained to have the same coordinates as a reference.
///  Correspondences between reference and molecule atom indices
///  are determined by refMatchVect.
/*!
  This function generates a depiction for a molecule where a piece of the
  molecule is constrained to have the same coordinates as a reference.

  This is useful for, for example, generating depictions of SAR data
  sets so that the cores of the molecules are all oriented the same way.
  This overload allow to specify the (referenceAtom, molAtom) index pairs
  which should be matched as MatchVectType. Please note that the
  vector can be shorter than the number of atoms in the reference.

  ARGUMENTS:

  \param mol -    the molecule to be aligned, this will come back
                  with a single conformer.
  \param reference -    a molecule with the reference atoms to align to;
                        this should have a depiction.
  \param refMatchVect -  a MatchVectType that will be used to
                         generate the atom mapping between the molecule
                         and the reference.
  \param confId - (optional) the id of the reference conformation to use
  \param params - (optional) an instance of ConstrainedDepictionParams
*/
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching2DStructure(
    RDKit::ROMol &mol, const RDKit::ROMol &reference,
    const RDKit::MatchVectType &refMatchVect, int confId = -1,
    const ConstrainedDepictionParams &params = ConstrainedDepictionParams());

//! \brief Overload
/*!
  ARGUMENTS:

  \param mol -    the molecule to be aligned, this will come back
                  with a single conformer.
  \param reference -    a molecule with the reference atoms to align to;
                        this should have a depiction.
  \param refMatchVect -  a MatchVectType that will be used to
                         generate the atom mapping between the molecule
                         and the reference.
  \param confId -       the id of the reference conformation to use
  \param forceRDKit - use RDKit to generate coordinates even if
                      preferCoordGen is set to true
*/
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching2DStructure(
    RDKit::ROMol &mol, const RDKit::ROMol &reference,
    const RDKit::MatchVectType &refMatchVect, int confId, bool forceRDKit);

//! \brief Compute 2D coordinates constrained to a reference;
///  the constraint can be hard (default) or soft.
/*!
  Hard (default, ConstrainedDepictionParams::alignOnly = false):
  Existing molecule coordinates, if present, are discarded;
  new coordinates are generated constraining a piece of the molecule
  to have the same coordinates as the reference, while the rest of
  the molecule is built around it.
  If ConstrainedDepictionParams::adjustMolBlockWedging is false
  (default), existing molblock wedging information is always preserved.
  If ConstrainedDepictionParams::adjustMolBlockWedging is true,
  existing molblock wedging information is preserved in case it
  only involves the invariant core and the core conformation has not
  changed, while it is cleared in case the wedging is also outside
  the invariant core, or core coordinates were changed.
  If ConstrainedDepictionParams::acceptFailure is set to true and no
  substructure match is found, coordinates will be recomputed from
  scratch, hence molblock wedging information will be cleared.

  Soft (ConstrainedDepictionParams::alignOnly = true):
  Existing coordinates in the conformation identified by
  ConstrainedDepictionParams::existingConfId are preserved if present,
  otherwise unconstrained new coordinates are generated.
  Subsequently, coodinates undergo a rigid-body alignment to the reference.
  If ConstrainedDepictionParams::adjustMolBlockWedging is false
  (default), existing molblock wedging information is always preserved.
  If ConstrainedDepictionParams::adjustMolBlockWedging is true,
  existing molblock wedging information is inverted in case the rigid-body
  alignment involved a flip around the Z axis.

  This is useful, for example, for generating depictions
  of SAR data sets such that the cores of the molecules are all oriented
  the same way.

  ARGUMENTS:

  \param mol -    the molecule to be aligned, this will come back
                  with a single conformer.
  \param reference -    a molecule with the reference atoms to align to;
                        this should have a depiction.
  \param confId -       (optional) the id of the reference conformation to use
  \param referencePattern -  (optional) a query molecule to be used to
                             generate the atom mapping between the molecule
                             and the reference.
  \param params -       (optional) a ConstrainedDepictionParams instance
  RETURNS:

  \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
          the constrained depiction
*/
RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(
    RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
    const RDKit::ROMol *referencePattern =
        static_cast<const RDKit::ROMol *>(nullptr),
    const ConstrainedDepictionParams &params = ConstrainedDepictionParams());

//! \brief Compute 2D coordinates where a piece of the molecule is
///  constrained to have the same coordinates as a reference.
/*!
  This function generates a depiction for a molecule where a piece of the
  molecule is constrained to have the same coordinates as a reference.

  This is useful, for example, for generating depictions
  of SAR data sets such that the cores of the molecules are all oriented
  the same way.

  ARGUMENTS:

  \param mol -    the molecule to be aligned, this will come back
                  with a single conformer.
  \param reference -    a molecule with the reference atoms to align to;
                        this should have a depiction.
  \param confId -       the id of the reference conformation to use
  \param referencePattern -  a query molecule to be used to
                             generate the atom mapping between the molecule
                             and the reference.
  \param acceptFailure - if true, standard depictions will be
                         generated for molecules that don't have a substructure
                         match to the reference; if false, throws a
                         DepictException.
  \param forceRDKit - (optional) use RDKit to generate coordinates even if
                      preferCoordGen is set to true
  \param allowOptionalAttachments -  (optional) if true, terminal dummy atoms in
                         the reference are ignored if they match an implicit
                         hydrogen in the molecule, and a constrained
                         depiction is still attempted
  RETURNS:

  \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
          the constrained depiction
*/
RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(
    RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId,
    const RDKit::ROMol *referencePattern, bool acceptFailure,
    bool forceRDKit = false, bool allowOptionalAttachments = false);

//! \brief Generate a 2D depiction for a molecule where all or part of
///  it mimics the coordinates of a 3D reference structure.
/*!
  Generates a depiction for a molecule where a piece of the molecule
  is constrained to have coordinates similar to those of a 3D reference
  structure.

  ARGUMENTS:
  \param mol - the molecule to be aligned, this will come back
               with a single conformer containing 2D coordinates
  \param reference - a molecule with the reference atoms to align to.
                     By default this should be the same as mol, but with
                     3D coordinates
  \param confId - (optional) the id of the reference conformation to use
  \param refPattern - (optional) a query molecule to map a subset of
                      the reference onto the mol, so that only some of the
                      atoms are aligned.
  \param acceptFailure - (optional) if true, standard depictions will be
                         generated
                         for molecules that don't match the reference or the
                         referencePattern; if false, throws a DepictException.
  \param forceRDKit - (optional) use RDKit to generate coordinates even if
                      preferCoordGen is set to true
*/
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching3DStructure(
    RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
    RDKit::ROMol *referencePattern = nullptr, bool acceptFailure = false,
    bool forceRDKit = false);

//! \brief Rotate the 2D depiction such that the majority of bonds have an
//! angle with the X axis which is a multiple of 30 degrees.
/*!

  ARGUMENTS:
  \param mol - the molecule to be rotated
  \param confId - (optional) the id of the reference conformation to use
  \param minimizeRotation - (optional) if false (the default), the molecule
  is rotated such that the majority of bonds have an angle with the
  X axis of 30 or 90 degrees. If true, the minimum rotation is applied
  such that the majority of bonds have an angle with the X axis of
  0, 30, 60, or 90 degrees, with the goal of altering the initial
  orientation as little as possible .
*/

RDKIT_DEPICTOR_EXPORT void straightenDepiction(RDKit::ROMol &mol,
                                               int confId = -1,
                                               bool minimizeRotation = false);

//! \brief Normalizes the 2D depiction.
/*!
  If canonicalize is != 0, the depiction is subjected to a canonical
  transformation such that its main axis is aligned along the X axis
  (canonicalize >0, the default) or the Y axis (canonicalize <0).
  If canonicalize is 0, no canonicalization takes place.
  If scaleFactor is <0.0 (the default) the depiction is scaled such
  that bond lengths conform to RDKit standards. The applied scaling
  factor is returned.

  ARGUMENTS:
  \param mol          - the molecule to be normalized
  \param confId       - (optional) the id of the reference conformation to use
  \param canonicalize - (optional) if != 0, a canonical transformation is
                        applied: if >0 (the default), the main molecule axis is
                        aligned to the X axis, if <0 to the Y axis.
                        If 0, no canonical transformation is applied.
  \param scaleFactor  - (optional) if >0.0, the scaling factor to apply. The
                        default (-1.0) means that the depiction is automatically
                        scaled such that bond lengths are the standard RDKit
                        ones.
  RETURNS:

  \return the applied scaling factor.
*/

RDKIT_DEPICTOR_EXPORT double normalizeDepiction(RDKit::ROMol &mol,
                                                int confId = -1,
                                                int canonicalize = 1,
                                                double scaleFactor = -1.0);
};  // namespace RDDepict

#endif