File: SubstanceGroup.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (302 lines) | stat: -rw-r--r-- 10,199 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
//
//
//  Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
/*! \file SubstanceGroup.h

  \brief Defines the SubstanceGroup class

*/
#include <RDGeneral/export.h>
#ifndef _RD_SGROUP_H
#define _RD_SGROUP_H

#include <iostream>
#include <utility>
#include <unordered_map>

#include <Geometry/point.h>
#include <RDGeneral/types.h>
#include <RDGeneral/RDProps.h>
#include <boost/smart_ptr.hpp>

namespace RDKit {
class ROMol;
class RWMol;
class Bond;
class Atom;

//! used to indicate errors from incorrect sgroup access
class RDKIT_GRAPHMOL_EXPORT SubstanceGroupException
    : public std::runtime_error {
 public:
  //! construct with an error message
  SubstanceGroupException(const char *msg) : std::runtime_error(msg) {}
  //! construct with an error message
  SubstanceGroupException(const std::string &msg) : std::runtime_error(msg) {}
};

//! The class for representing SubstanceGroups
/*!
  <b>Notes:</b>
  - These are inspired by the SGroups in the MDL formats
  - Implementation is based on 2010 MDL SD specification:
    http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
  - See SGroups.md for further, more comprehensive notes.

*/

class RDKIT_GRAPHMOL_EXPORT SubstanceGroup : public RDProps {
 public:
  //! Bond type (see V3000 spec)
  enum class BondType {
    XBOND,  // External/Crossing bond
    CBOND,  // Internal/Contained bond
  };

  typedef std::array<RDGeom::Point3D, 3> Bracket;

  //! Data structure for SAP lines (see V3000 spec)
  //! lvIdx may not be set; this signaled with value -1
  struct AttachPoint {
    unsigned int aIdx;
    int lvIdx;
    std::string id;
    bool operator==(const AttachPoint &other) const {
      return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
    }
  };

  //! See specification for V3000 CSTATE
  //! vector may or not be considered, depending on TYPE
  struct CState {
    unsigned int bondIdx;
    RDGeom::Point3D vector;
    bool operator==(const CState &other) const {
      // note that we ignore coordinates for this
      return bondIdx == other.bondIdx;
    }
  };

//! No default constructor
#ifndef SWIG
  // Unfortunately, SWIG generated wrapper code uses temporary variables that
  // require a default ctor not be deleted.
  SubstanceGroup() = delete;
#endif  // !SWIG

  //! Main Constructor. Ownership is only set on this side of the relationship:
  //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
  //! on the other side.
  SubstanceGroup(ROMol *owning_mol, const std::string &type);

  SubstanceGroup(const SubstanceGroup &other) = default;
  SubstanceGroup &operator=(const SubstanceGroup &other) = default;

  SubstanceGroup(SubstanceGroup &&other) noexcept : RDProps(std::move(other)) {
    dp_mol = std::exchange(other.dp_mol, nullptr);
    d_atoms = std::move(other.d_atoms);
    d_patoms = std::move(other.d_patoms);
    d_bonds = std::move(other.d_bonds);
    d_brackets = std::move(other.d_brackets);
    d_cstates = std::move(other.d_cstates);
    d_saps = std::move(other.d_saps);
  }

  SubstanceGroup &operator=(SubstanceGroup &&other) noexcept {
    if (this == &other) {
      return *this;
    }
    RDProps::operator=(std::move(other));
    dp_mol = std::exchange(other.dp_mol, nullptr);
    d_atoms = std::move(other.d_atoms);
    d_patoms = std::move(other.d_patoms);
    d_bonds = std::move(other.d_bonds);
    d_brackets = std::move(other.d_brackets);
    d_cstates = std::move(other.d_cstates);
    d_saps = std::move(other.d_saps);
    return *this;
  }

  //! Destructor
  ~SubstanceGroup() = default;

  //! returns whether or not this belongs to a molecule
  bool hasOwningMol() const { return dp_mol != nullptr; }

  //! Get the molecule that owns this instance
  ROMol &getOwningMol() const {
    PRECONDITION(dp_mol, "no owner");
    return *dp_mol;
  }

  //! returns whether or not this group is valid; invalid groups must be
  //! ignored.
  bool getIsValid() const { return d_isValid; }

  //! set whether or not this group is valid; invalid groups must be ignored.
  void setIsValid(bool isValid) { d_isValid = isValid; }

  //! get the index of this sgroup in dp_mol's sgroups vector
  //! (do not mistake this by the ID!)
  unsigned int getIndexInMol() const;

  /* Atom and Bond methods */
  void addAtomWithIdx(unsigned int idx);
  void addParentAtomWithIdx(unsigned int idx);
  void addBondWithIdx(unsigned int idx);
  void addAtomWithBookmark(int mark);
  void addParentAtomWithBookmark(int mark);
  void addBondWithBookmark(int mark);

  // These methods should be handled with care, since they can leave
  // Attachment points and CStates in an invalid state!
  void removeAtomWithIdx(unsigned int idx);
  void removeParentAtomWithIdx(unsigned int idx);
  void removeBondWithIdx(unsigned int idx);

  void addBracket(const Bracket &bracket);
  void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
  void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);

  BondType getBondType(unsigned int bondIdx) const;

  const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
  const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
  const std::vector<unsigned int> &getBonds() const { return d_bonds; }

  void setAtoms(std::vector<unsigned int> atoms);
  void setParentAtoms(std::vector<unsigned int> patoms);
  void setBonds(std::vector<unsigned int> bonds);

  const std::vector<Bracket> &getBrackets() const { return d_brackets; }
  const std::vector<CState> &getCStates() const { return d_cstates; }
  const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }

  std::vector<Bracket> &getBrackets() { return d_brackets; }
  std::vector<CState> &getCStates() { return d_cstates; }
  std::vector<AttachPoint> &getAttachPoints() { return d_saps; }

  void clearBrackets() { d_brackets.clear(); }
  void clearCStates() { d_cstates.clear(); }
  void clearAttachPoints() { d_saps.clear(); }

  //! adjusts our atom IDs to reflect that an atom has been removed from the
  //! parent molecule
  //!   decrements all atom IDs that are higher than \c atomIdx
  //!   raises a \c SubstanceGroupException if \c atomIdx is actually part of
  //!   this substance group
  //! \returns whether or not anything was changed
  bool adjustToRemovedAtom(unsigned int atomIdx);

  //! \returns whether or not anything the specified atom is part of the
  //! definition of this substance group
  bool includesAtom(unsigned int atomIdx) const;

  //! adjusts our bond IDs to reflect that a bond has been removed from the
  //! parent molecule
  //!   decrements all bond IDs that are higher than \c bondIdx
  //!   raises a \c SubstanceGroupException if \c bondIdx is actually part of
  //!   this substance group
  //! \returns whether or not anything was changed
  bool adjustToRemovedBond(unsigned int bondIdx);

  //! \returns whether or not anything the specified bond is part of the
  //! definition of this substance group
  bool includesBond(unsigned int bondIdx) const;

  //! Set owning molecule
  //! This only updates atoms and bonds; parent sgroup has to be updated
  //! independently, since parent might not exist at the time this is
  //! called.
  void setOwningMol(ROMol *mol);

  bool operator==(const SubstanceGroup &other) const {
    // we ignore brackets and cstates, which involve coordinates
    return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
           d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
           d_saps == other.d_saps;
  }

 private:
  ROMol *dp_mol = nullptr;  // owning molecule

  bool d_isValid = true;

  std::vector<unsigned int> d_atoms;
  std::vector<unsigned int> d_patoms;
  std::vector<unsigned int> d_bonds;

  std::vector<Bracket> d_brackets;
  std::vector<CState> d_cstates;
  std::vector<AttachPoint> d_saps;
};  // namespace RDKit

namespace SubstanceGroupChecks {

const std::vector<std::string> sGroupTypes = {
    // polymer sgroups:
    "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
    // formulations/mixtures:
    "COM", "MIX", "FOR",
    // other
    "SUP", "MUL", "DAT", "GEN"};

const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};

RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);

RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);

RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);

RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol,
                                                  unsigned int id);

}  // namespace SubstanceGroupChecks

//! \name SubstanceGroups and molecules
//! @{

RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
    ROMol &mol);
RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
    const ROMol &mol);

//! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
//! references to the SubstanceGroup exist.
/*!
  \param sgroup - SubstanceGroup to be added to the molecule.
*/
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol,
                                                     SubstanceGroup sgroup);

//! Removes SubstanceGroups which reference a particular atom index
/*!
  \param mol - molecule to be edited.
  \param idx - atom index
*/
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(
    RWMol &mol, unsigned int idx);
//! Removes SubstanceGroups which reference a particular bond index
/*!
  \param mol - molecule to be edited.
  \param idx - bond index
*/
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(
    RWMol &mol, unsigned int idx);
//! @}

}  // namespace RDKit

//! allows SubstanceGroup objects to be dumped to streams
RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
                                               const RDKit::SubstanceGroup &sg);
#endif