1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
|
//
//
// Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
/*! \file SubstanceGroup.h
\brief Defines the SubstanceGroup class
*/
#include <RDGeneral/export.h>
#ifndef _RD_SGROUP_H
#define _RD_SGROUP_H
#include <iostream>
#include <utility>
#include <unordered_map>
#include <Geometry/point.h>
#include <RDGeneral/types.h>
#include <RDGeneral/RDProps.h>
#include <boost/smart_ptr.hpp>
namespace RDKit {
class ROMol;
class RWMol;
class Bond;
class Atom;
//! used to indicate errors from incorrect sgroup access
class RDKIT_GRAPHMOL_EXPORT SubstanceGroupException
: public std::runtime_error {
public:
//! construct with an error message
SubstanceGroupException(const char *msg) : std::runtime_error(msg) {}
//! construct with an error message
SubstanceGroupException(const std::string &msg) : std::runtime_error(msg) {}
};
//! The class for representing SubstanceGroups
/*!
<b>Notes:</b>
- These are inspired by the SGroups in the MDL formats
- Implementation is based on 2010 MDL SD specification:
http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
- See SGroups.md for further, more comprehensive notes.
*/
class RDKIT_GRAPHMOL_EXPORT SubstanceGroup : public RDProps {
public:
//! Bond type (see V3000 spec)
enum class BondType {
XBOND, // External/Crossing bond
CBOND, // Internal/Contained bond
};
typedef std::array<RDGeom::Point3D, 3> Bracket;
//! Data structure for SAP lines (see V3000 spec)
//! lvIdx may not be set; this signaled with value -1
struct AttachPoint {
unsigned int aIdx;
int lvIdx;
std::string id;
bool operator==(const AttachPoint &other) const {
return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
}
};
//! See specification for V3000 CSTATE
//! vector may or not be considered, depending on TYPE
struct CState {
unsigned int bondIdx;
RDGeom::Point3D vector;
bool operator==(const CState &other) const {
// note that we ignore coordinates for this
return bondIdx == other.bondIdx;
}
};
//! No default constructor
#ifndef SWIG
// Unfortunately, SWIG generated wrapper code uses temporary variables that
// require a default ctor not be deleted.
SubstanceGroup() = delete;
#endif // !SWIG
//! Main Constructor. Ownership is only set on this side of the relationship:
//! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
//! on the other side.
SubstanceGroup(ROMol *owning_mol, const std::string &type);
SubstanceGroup(const SubstanceGroup &other) = default;
SubstanceGroup &operator=(const SubstanceGroup &other) = default;
SubstanceGroup(SubstanceGroup &&other) noexcept : RDProps(std::move(other)) {
dp_mol = std::exchange(other.dp_mol, nullptr);
d_atoms = std::move(other.d_atoms);
d_patoms = std::move(other.d_patoms);
d_bonds = std::move(other.d_bonds);
d_brackets = std::move(other.d_brackets);
d_cstates = std::move(other.d_cstates);
d_saps = std::move(other.d_saps);
}
SubstanceGroup &operator=(SubstanceGroup &&other) noexcept {
if (this == &other) {
return *this;
}
RDProps::operator=(std::move(other));
dp_mol = std::exchange(other.dp_mol, nullptr);
d_atoms = std::move(other.d_atoms);
d_patoms = std::move(other.d_patoms);
d_bonds = std::move(other.d_bonds);
d_brackets = std::move(other.d_brackets);
d_cstates = std::move(other.d_cstates);
d_saps = std::move(other.d_saps);
return *this;
}
//! Destructor
~SubstanceGroup() = default;
//! returns whether or not this belongs to a molecule
bool hasOwningMol() const { return dp_mol != nullptr; }
//! Get the molecule that owns this instance
ROMol &getOwningMol() const {
PRECONDITION(dp_mol, "no owner");
return *dp_mol;
}
//! returns whether or not this group is valid; invalid groups must be
//! ignored.
bool getIsValid() const { return d_isValid; }
//! set whether or not this group is valid; invalid groups must be ignored.
void setIsValid(bool isValid) { d_isValid = isValid; }
//! get the index of this sgroup in dp_mol's sgroups vector
//! (do not mistake this by the ID!)
unsigned int getIndexInMol() const;
/* Atom and Bond methods */
void addAtomWithIdx(unsigned int idx);
void addParentAtomWithIdx(unsigned int idx);
void addBondWithIdx(unsigned int idx);
void addAtomWithBookmark(int mark);
void addParentAtomWithBookmark(int mark);
void addBondWithBookmark(int mark);
// These methods should be handled with care, since they can leave
// Attachment points and CStates in an invalid state!
void removeAtomWithIdx(unsigned int idx);
void removeParentAtomWithIdx(unsigned int idx);
void removeBondWithIdx(unsigned int idx);
void addBracket(const Bracket &bracket);
void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
BondType getBondType(unsigned int bondIdx) const;
const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
const std::vector<unsigned int> &getBonds() const { return d_bonds; }
void setAtoms(std::vector<unsigned int> atoms);
void setParentAtoms(std::vector<unsigned int> patoms);
void setBonds(std::vector<unsigned int> bonds);
const std::vector<Bracket> &getBrackets() const { return d_brackets; }
const std::vector<CState> &getCStates() const { return d_cstates; }
const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
std::vector<Bracket> &getBrackets() { return d_brackets; }
std::vector<CState> &getCStates() { return d_cstates; }
std::vector<AttachPoint> &getAttachPoints() { return d_saps; }
void clearBrackets() { d_brackets.clear(); }
void clearCStates() { d_cstates.clear(); }
void clearAttachPoints() { d_saps.clear(); }
//! adjusts our atom IDs to reflect that an atom has been removed from the
//! parent molecule
//! decrements all atom IDs that are higher than \c atomIdx
//! raises a \c SubstanceGroupException if \c atomIdx is actually part of
//! this substance group
//! \returns whether or not anything was changed
bool adjustToRemovedAtom(unsigned int atomIdx);
//! \returns whether or not anything the specified atom is part of the
//! definition of this substance group
bool includesAtom(unsigned int atomIdx) const;
//! adjusts our bond IDs to reflect that a bond has been removed from the
//! parent molecule
//! decrements all bond IDs that are higher than \c bondIdx
//! raises a \c SubstanceGroupException if \c bondIdx is actually part of
//! this substance group
//! \returns whether or not anything was changed
bool adjustToRemovedBond(unsigned int bondIdx);
//! \returns whether or not anything the specified bond is part of the
//! definition of this substance group
bool includesBond(unsigned int bondIdx) const;
//! Set owning molecule
//! This only updates atoms and bonds; parent sgroup has to be updated
//! independently, since parent might not exist at the time this is
//! called.
void setOwningMol(ROMol *mol);
bool operator==(const SubstanceGroup &other) const {
// we ignore brackets and cstates, which involve coordinates
return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
d_saps == other.d_saps;
}
private:
ROMol *dp_mol = nullptr; // owning molecule
bool d_isValid = true;
std::vector<unsigned int> d_atoms;
std::vector<unsigned int> d_patoms;
std::vector<unsigned int> d_bonds;
std::vector<Bracket> d_brackets;
std::vector<CState> d_cstates;
std::vector<AttachPoint> d_saps;
}; // namespace RDKit
namespace SubstanceGroupChecks {
const std::vector<std::string> sGroupTypes = {
// polymer sgroups:
"SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
// formulations/mixtures:
"COM", "MIX", "FOR",
// other
"SUP", "MUL", "DAT", "GEN"};
const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol,
unsigned int id);
} // namespace SubstanceGroupChecks
//! \name SubstanceGroups and molecules
//! @{
RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
ROMol &mol);
RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
const ROMol &mol);
//! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
//! references to the SubstanceGroup exist.
/*!
\param sgroup - SubstanceGroup to be added to the molecule.
*/
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol,
SubstanceGroup sgroup);
//! Removes SubstanceGroups which reference a particular atom index
/*!
\param mol - molecule to be edited.
\param idx - atom index
*/
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(
RWMol &mol, unsigned int idx);
//! Removes SubstanceGroups which reference a particular bond index
/*!
\param mol - molecule to be edited.
\param idx - bond index
*/
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(
RWMol &mol, unsigned int idx);
//! @}
} // namespace RDKit
//! allows SubstanceGroup objects to be dumped to streams
RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
const RDKit::SubstanceGroup &sg);
#endif
|