File: ChemTransforms.h

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (272 lines) | stat: -rw-r--r-- 11,502 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
//
//  Copyright (C) 2006-2012 Greg Landrum
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef _RD_CHEMTRANSFORMS_H__
#define _RD_CHEMTRANSFORMS_H__

#include <boost/smart_ptr.hpp>
#include <vector>
#include <iostream>

#include <GraphMol/Substruct/SubstructMatch.h>
#include "MolFragmenter.h"

namespace RDKit {
class ROMol;
typedef boost::shared_ptr<ROMol> ROMOL_SPTR;

//! \brief Returns a copy of an ROMol with the atoms and bonds that
//!      match a pattern removed.
/*!
    \param mol       the ROMol of interest
    \param query     the query ROMol
    \param onlyFrags  if this is set, atoms will only be removed if
                      the entire fragment in which they are found is
                      matched by the query.
    \param useChirality - if set, match the coreQuery using chirality

    \return a copy of \c mol with the matching atoms and bonds (if any)
            removed.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query,
                        bool onlyFrags = false, bool useChirality = false);

//! \brief Returns a list of copies of an ROMol with the atoms and bonds that
//!      match a pattern replaced with the atoms contained in another molecule.
/*!
   Bonds are created between the joining atom in the existing molecule
   and the atoms in the new molecule. So, using SMILES instead of molecules:
          replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
                ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
          replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
                ['[X]NCCN[X]']
   Chains should be handled "correctly":
          replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
                ['C[X]C']
   As should rings:
          replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
                ['C1[X]C1']
   And higher order branches:
          replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
                ['C[X](C)C']
   Note that the client is responsible for making sure that the
     resulting molecule actually makes sense - this function does not
     perform sanitization.

    \param mol         the ROMol of interest
    \param query       the query ROMol
    \param replacement the ROMol to be inserted
    \param replaceAll  if this is true, only a single result, with all
    \param useChirality - if set, match the coreQuery using chirality

   occurances
                       of the substructure replaced, will be returned.
    \param replacementConnectionPoint   index of the atom in the replacement
   that
                                        the bond should made to

    \return a vector of pointers to copies of \c mol with the matching atoms
        and bonds (if any) replaced

*/
RDKIT_CHEMTRANSFORMS_EXPORT std::vector<ROMOL_SPTR> replaceSubstructs(
    const ROMol &mol, const ROMol &query, const ROMol &replacement,
    bool replaceAll = false, unsigned int replacementConnectionPoint = 0,
    bool useChirality = false);

//! \brief Returns a copy of an ROMol with the atoms and bonds that
//!      don't fall within a substructure match removed.
//!
//!   dummy atoms are left to indicate attachment points.
//!
/*!
    \param mol       the ROMol of interest
    \param coreQuery a query ROMol to be used to match the core
    \param useChirality - if set, match the coreQuery using chirality

    \return a copy of \c mol with the non-matching atoms and bonds (if any)
            removed and dummies at the connection points.
*/


RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery,
                         bool useChirality = false);

//! \brief Returns a copy of an ROMol with the atoms and bonds that
//!      are referenced by the MatchVector removed.
//!      MatchVector must be defined between mol and the specified core.
//!
//!   dummy atoms are left to indicate attachment points.
//!    These dummy atoms can be labeled either by the matching index
//!     in the query or by an arbitrary "first match" found.
//!    Additional matching options are given below.
//!
/*!
    Note that this is essentially identical to the replaceSidechains function,
   except we
    invert the query and replace the atoms that *do* match the query.

    \param mol            - the ROMol of interest
    \param core           - the core being matched against
    \param matchVect      - a matchVect of the type returned by Substructure Matching
    \param replaceDummies - if set, atoms matching dummies in the core will also
   be replaced
    \param labelByIndex  - if set, the dummy atoms at attachment points are
   labelled with the
                           index+1 of the corresponding atom in the core
    \param requireDummyMatch - if set, only side chains that are connected to
   atoms in
                               the core that have attached dummies will be
   considered.
                               Molecules that have sidechains that are attached
                               at other points will be rejected (NULL returned).
    \param useChirality - if set, match the coreQuery using chirality

    \return a copy of \c mol with the non-matching atoms and bonds (if any)
            removed and dummies at the connection points. The client is
   responsible
            for deleting this molecule. If the core query is not matched, NULL
   is returned.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceCore(const ROMol &mol, const ROMol &core,
                   const MatchVectType &matchVect,
                   bool replaceDummies = true,
                   bool labelByIndex = false,
                   bool requireDummyMatch = false);

//! \brief Returns a copy of an ROMol with the atoms and bonds that
//!      do fall within a substructure match removed.
//!
//!   dummy atoms are left to indicate attachment points.
//!
/*!
    Note that this is essentially identical to the replaceSidechains function,
   except we
    invert the query and replace the atoms that *do* match the query.

    \param mol            - the ROMol of interest
    \param coreQuery      - a query ROMol to be used to match the core
    \param replaceDummies - if set, atoms matching dummies in the core will also
   be replaced
    \param labelByIndex  - if set, the dummy atoms at attachment points are
   labelled with the
                           index+1 of the corresponding atom in the core
    \param requireDummyMatch - if set, only side chains that are connected to
   atoms in
                               the core that have attached dummies will be
   considered.
                               Molecules that have sidechains that are attached
                               at other points will be rejected (NULL returned).
    \param useChirality - if set, match the coreQuery using chirality

    \return a copy of \c mol with the non-matching atoms and bonds (if any)
            removed and dummies at the connection points. The client is
   responsible
            for deleting this molecule. If the core query is not matched, NULL
   is returned.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceCore(const ROMol &mol, const ROMol &coreQuery,
                   bool replaceDummies = true, bool labelByIndex = false,
                   bool requireDummyMatch = false, bool useChirality = false);

//! \brief Carries out a Murcko decomposition on the molecule provided
//!
/*!

    \param mol    - the ROMol of interest

    \return a new ROMol with the Murcko scaffold
            The client is responsible for deleting this molecule.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *MurckoDecompose(const ROMol &mol);

//! \brief Combined two molecules to create a new one
//!
/*!

    \param mol1           - the first ROMol to be combined
    \param mol2           - the second ROMol to be combined
    \param offset         - a constant offset to be added to every
                            atom position in mol2

    \return a new ROMol with the two molecules combined.
            The new molecule has not been sanitized.
            The client is responsible for deleting this molecule.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *combineMols(const ROMol &mol1, const ROMol &mol2,
                   RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));

//! \brief Adds named recursive queries to a molecule's atoms based on atom
// labels
//!
/*!

    \param mol            - the molecule to be modified
    \param queries        - the dictionary of named queries to add
    \param propName       - the atom property to use to get query names
    \param reactantLabels - to store pairs of (atom index, query string)


    NOTES:
      - existing query information, if present, will be supplemented (AND logic)
      - non-query atoms will be replaced with query atoms using only the query
        logic
      - query names can be present as comma separated lists, they will then
        be combined using OR logic.
      - throws a KeyErrorException if a particular query name is not present
        in \c queries

*/
RDKIT_CHEMTRANSFORMS_EXPORT void addRecursiveQueries(
    ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
    const std::string &propName,
    std::vector<std::pair<unsigned int, std::string> > *reactantLabels = NULL);

//! \brief parses a query definition file and sets up a set of definitions
//!  suitable for use by addRecursiveQueries()
/*!

    \param filename         - the name of the file to be read
    \param queryDefs        - the dictionary of named queries (return value)
    \param standardize      - if true, query names will be converted to lower
   case
    \param delimiter        - the line delimiter in the file
    \param comment          - text used to recognize comment lines
    \param nameColumn       - column with the names of queries
    \param smartsColumn     - column with the SMARTS definitions of the queries

*/
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(const std::string &filename,
                       std::map<std::string, ROMOL_SPTR> &queryDefs,
                       bool standardize = true,
                       const std::string &delimiter = "\t",
                       const std::string &comment = "//",
                       unsigned int nameColumn = 0,
                       unsigned int smartsColumn = 1);
//! \overload
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(std::istream *inStream,
                       std::map<std::string, ROMOL_SPTR> &queryDefs,
                       bool standardize = true,
                       const std::string &delimiter = "\t",
                       const std::string &comment = "//",
                       unsigned int nameColumn = 0,
                       unsigned int smartsColumn = 1);
//! \brief equivalent to parseQueryDefFile() but the query definitions are
// explicitly passed in
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefText(const std::string &queryDefText,
                       std::map<std::string, ROMOL_SPTR> &queryDefs,
                       bool standardize = true,
                       const std::string &delimiter = "\t",
                       const std::string &comment = "//",
                       unsigned int nameColumn = 0,
                       unsigned int smartsColumn = 1);
}

#endif