File: Abbreviations.h

package info (click to toggle)
rdkit 202503.1-4
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 578; xml: 229; fortran: 183; sh: 105
file content (135 lines) | stat: -rw-r--r-- 5,462 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
//
//  Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_ABBREVIATIONS_H
#define RD_ABBREVIATIONS_H
#include <GraphMol/Substruct/SubstructMatch.h>
#include <utility>
#include <vector>
#include <string>
#include <memory>

namespace RDKit {
class ROMol;
class RWMol;

namespace Abbreviations {
struct RDKIT_ABBREVIATIONS_EXPORT AbbreviationDefinition {
  std::string label;
  std::string displayLabel;
  std::string displayLabelW;
  std::string smarts;
  std::shared_ptr<ROMol> mol;                  //!< optional
  std::vector<unsigned int> extraAttachAtoms;  //!< optional
  bool operator==(const AbbreviationDefinition &other) const {
    return label == other.label && displayLabel == other.displayLabel &&
           displayLabelW == other.displayLabelW && smarts == other.smarts;
  }
  bool operator!=(const AbbreviationDefinition &other) const {
    return !(*this == other);
  }
};
struct RDKIT_ABBREVIATIONS_EXPORT AbbreviationMatch {
  MatchVectType match;
  AbbreviationDefinition abbrev;
  AbbreviationMatch(std::vector<std::pair<int, int>> matchArg,
                    AbbreviationDefinition abbrevArg)
      : match(std::move(matchArg)), abbrev(std::move(abbrevArg)) {}
  AbbreviationMatch() : match(), abbrev() {}
  bool operator==(const AbbreviationMatch &other) const {
    return abbrev == other.abbrev && match == other.match;
  }
  bool operator!=(const AbbreviationMatch &other) const {
    return !(*this == other);
  }
};
namespace common_properties {
RDKIT_ABBREVIATIONS_EXPORT extern const std::string numDummies;
RDKIT_ABBREVIATIONS_EXPORT extern const std::string origAtomMapping;
RDKIT_ABBREVIATIONS_EXPORT extern const std::string origBondMapping;
}  // namespace common_properties
namespace Utils {
//! returns the default set of abbreviation definitions
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
getDefaultAbbreviations();
//! returns the default set of linker definitions
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
getDefaultLinkers();

//! parses a string describing abbreviation matches and returns the result
/*

\param text the data to be parsed, see below for the format
\param removeExtraDummies controls whether or not dummy atoms beyond atom 0 are
       removed. Set this to true to create abbreviations for linkers
\param allowConnectionToDummies allows abbreviations to directly connect to
       abbreviations. set this to true for linkers

Format of the text data:
  A series of lines, each of which contains:

    label SMARTS displayLabel displayLabelW

  the "displayLabel" and "displayLabelW" fields are optional.
  where label is the label used for the abbreviation,
  SMARTS is the SMARTS definition of the abbreviation.
  displayLabel is used in drawings to render the abbreviations.
  displayLabelW is the display label if a bond comes in from the right

  Use dummies to indicate attachment points. The assumption is that the first
  atom is a dummy (one will be added if this is not true) and that the second
  atom is the surrogate for the rest of the group.

*/
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
parseAbbreviations(const std::string &text, bool removeExtraDummies = false,
                   bool allowConnectionToDummies = false);
//! \brief equivalent to calling \c parseAbbreviations(text,true,true)
inline std::vector<AbbreviationDefinition> parseLinkers(
    const std::string &text) {
  return parseAbbreviations(text, true, true);
};
}  // namespace Utils

//! returns all matches for the abbreviations across the molecule
/*!

    \param abbrevs the abbreviations to look for. This list is used in order.
    \param maxCoverage any abbreviation that covers than more than this fraction
        of the molecule's atoms (not counting dummies) will not be returned.
*/
RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationMatch>
findApplicableAbbreviationMatches(
    const ROMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
    double maxCoverage = 0.4);
//! applies the abbreviation matches to a molecule, modifying it in place.
//! the modified molecule is not sanitized
RDKIT_ABBREVIATIONS_EXPORT void applyMatches(
    RWMol &mol, const std::vector<AbbreviationMatch> &matches);
//! creates "SUP" SubstanceGroups on the molecule describing the abbreviation
RDKIT_ABBREVIATIONS_EXPORT void labelMatches(
    RWMol &mol, const std::vector<AbbreviationMatch> &matches);
//! convenience function for finding and applying abbreviations
//! the modified molecule is not sanitized
RDKIT_ABBREVIATIONS_EXPORT void condenseMolAbbreviations(
    RWMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
    double maxCoverage = 0.4, bool sanitize = true);
//! convenience function for finding and labeling abbreviations as SUP
//! SubstanceGroups
RDKIT_ABBREVIATIONS_EXPORT void labelMolAbbreviations(
    RWMol &mol, const std::vector<AbbreviationDefinition> &abbrevs,
    double maxCoverage = 0.4);
//! collapses abbreviation (i.e. "SUP") substance groups
//! the modified molecule is not sanitized
RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(RWMol &mol);

}  // namespace Abbreviations
}  // namespace RDKit
#endif