1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
//
// Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_ABBREVIATIONS_H
#define RD_ABBREVIATIONS_H
#include <GraphMol/Substruct/SubstructMatch.h>
#include <utility>
#include <vector>
#include <string>
#include <memory>
namespace RDKit {
class ROMol;
class RWMol;
namespace Abbreviations {
struct RDKIT_ABBREVIATIONS_EXPORT AbbreviationDefinition {
std::string label;
std::string displayLabel;
std::string displayLabelW;
std::string smarts;
std::shared_ptr<ROMol> mol; //!< optional
std::vector<unsigned int> extraAttachAtoms; //!< optional
bool operator==(const AbbreviationDefinition &other) const {
return label == other.label && displayLabel == other.displayLabel &&
displayLabelW == other.displayLabelW && smarts == other.smarts;
}
bool operator!=(const AbbreviationDefinition &other) const {
return !(*this == other);
}
};
struct RDKIT_ABBREVIATIONS_EXPORT AbbreviationMatch {
MatchVectType match;
AbbreviationDefinition abbrev;
AbbreviationMatch(std::vector<std::pair<int, int>> matchArg,
AbbreviationDefinition abbrevArg)
: match(std::move(matchArg)), abbrev(std::move(abbrevArg)) {}
AbbreviationMatch() : match(), abbrev() {}
bool operator==(const AbbreviationMatch &other) const {
return abbrev == other.abbrev && match == other.match;
}
bool operator!=(const AbbreviationMatch &other) const {
return !(*this == other);
}
};
namespace common_properties {
RDKIT_ABBREVIATIONS_EXPORT extern const std::string numDummies;
RDKIT_ABBREVIATIONS_EXPORT extern const std::string origAtomMapping;
RDKIT_ABBREVIATIONS_EXPORT extern const std::string origBondMapping;
} // namespace common_properties
namespace Utils {
//! returns the default set of abbreviation definitions
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
getDefaultAbbreviations();
//! returns the default set of linker definitions
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
getDefaultLinkers();
//! parses a string describing abbreviation matches and returns the result
/*
\param text the data to be parsed, see below for the format
\param removeExtraDummies controls whether or not dummy atoms beyond atom 0 are
removed. Set this to true to create abbreviations for linkers
\param allowConnectionToDummies allows abbreviations to directly connect to
abbreviations. set this to true for linkers
Format of the text data:
A series of lines, each of which contains:
label SMARTS displayLabel displayLabelW
the "displayLabel" and "displayLabelW" fields are optional.
where label is the label used for the abbreviation,
SMARTS is the SMARTS definition of the abbreviation.
displayLabel is used in drawings to render the abbreviations.
displayLabelW is the display label if a bond comes in from the right
Use dummies to indicate attachment points. The assumption is that the first
atom is a dummy (one will be added if this is not true) and that the second
atom is the surrogate for the rest of the group.
*/
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
parseAbbreviations(const std::string &text, bool removeExtraDummies = false,
bool allowConnectionToDummies = false);
//! \brief equivalent to calling \c parseAbbreviations(text,true,true)
inline std::vector<AbbreviationDefinition> parseLinkers(
const std::string &text) {
return parseAbbreviations(text, true, true);
};
} // namespace Utils
//! returns all matches for the abbreviations across the molecule
/*!
\param abbrevs the abbreviations to look for. This list is used in order.
\param maxCoverage any abbreviation that covers than more than this fraction
of the molecule's atoms (not counting dummies) will not be returned.
*/
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationMatch>
findApplicableAbbreviationMatches(
const ROMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
double maxCoverage = 0.4);
//! applies the abbreviation matches to a molecule, modifying it in place.
//! the modified molecule is not sanitized
RDKIT_ABBREVIATIONS_EXPORT void applyMatches(
RWMol &mol, const std::vector<AbbreviationMatch> &matches);
//! creates "SUP" SubstanceGroups on the molecule describing the abbreviation
RDKIT_ABBREVIATIONS_EXPORT void labelMatches(
RWMol &mol, const std::vector<AbbreviationMatch> &matches);
//! convenience function for finding and applying abbreviations
//! the modified molecule is not sanitized
RDKIT_ABBREVIATIONS_EXPORT void condenseMolAbbreviations(
RWMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
double maxCoverage = 0.4, bool sanitize = true);
//! convenience function for finding and labeling abbreviations as SUP
//! SubstanceGroups
RDKIT_ABBREVIATIONS_EXPORT void labelMolAbbreviations(
RWMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
double maxCoverage = 0.4);
//! collapses abbreviation (i.e. "SUP") substance groups
//! the modified molecule is not sanitized
RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(RWMol &mol);
} // namespace Abbreviations
} // namespace RDKit
#endif
|