1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
|
//
// Copyright (C) 2006-2012 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef _RD_CHEMTRANSFORMS_H__
#define _RD_CHEMTRANSFORMS_H__
#include <boost/smart_ptr.hpp>
#include <vector>
#include <iostream>
#include <GraphMol/Substruct/SubstructMatch.h>
#include "MolFragmenter.h"
namespace RDKit {
class ROMol;
typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
//! \brief Returns a copy of an ROMol with the atoms and bonds that
//! match a pattern removed.
/*!
\param mol the ROMol of interest
\param query the query ROMol
\param onlyFrags if this is set, atoms will only be removed if
the entire fragment in which they are found is
matched by the query.
\param useChirality - if set, match the coreQuery using chirality
\return a copy of \c mol with the matching atoms and bonds (if any)
removed.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query,
bool onlyFrags = false, bool useChirality = false);
//! \brief Returns a list of copies of an ROMol with the atoms and bonds that
//! match a pattern replaced with the atoms contained in another molecule.
/*!
Bonds are created between the joining atom in the existing molecule
and the atoms in the new molecule. So, using SMILES instead of molecules:
replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
['[X]NCCN[X]']
Chains should be handled "correctly":
replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
['C[X]C']
As should rings:
replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
['C1[X]C1']
And higher order branches:
replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
['C[X](C)C']
Note that the client is responsible for making sure that the
resulting molecule actually makes sense - this function does not
perform sanitization.
\param mol the ROMol of interest
\param query the query ROMol
\param replacement the ROMol to be inserted
\param replaceAll if this is true, only a single result, with all
\param useChirality - if set, match the coreQuery using chirality
occurances
of the substructure replaced, will be returned.
\param replacementConnectionPoint index of the atom in the replacement
that
the bond should made to
\return a vector of pointers to copies of \c mol with the matching atoms
and bonds (if any) replaced
*/
RDKIT_CHEMTRANSFORMS_EXPORT std::vector<ROMOL_SPTR> replaceSubstructs(
const ROMol &mol, const ROMol &query, const ROMol &replacement,
bool replaceAll = false, unsigned int replacementConnectionPoint = 0,
bool useChirality = false);
//! \brief Returns a copy of an ROMol with the atoms and bonds that
//! don't fall within a substructure match removed.
//!
//! dummy atoms are left to indicate attachment points.
//!
/*!
\param mol the ROMol of interest
\param coreQuery a query ROMol to be used to match the core
\param useChirality - if set, match the coreQuery using chirality
\return a copy of \c mol with the non-matching atoms and bonds (if any)
removed and dummies at the connection points.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery,
bool useChirality = false);
//! \brief Returns a copy of an ROMol with the atoms and bonds that
//! are referenced by the MatchVector removed.
//! MatchVector must be defined between mol and the specified core.
//!
//! dummy atoms are left to indicate attachment points.
//! These dummy atoms can be labeled either by the matching index
//! in the query or by an arbitrary "first match" found.
//! Additional matching options are given below.
//!
/*!
Note that this is essentially identical to the replaceSidechains function,
except we
invert the query and replace the atoms that *do* match the query.
\param mol - the ROMol of interest
\param core - the core being matched against
\param matchVect - a matchVect of the type returned by Substructure Matching
\param replaceDummies - if set, atoms matching dummies in the core will also
be replaced
\param labelByIndex - if set, the dummy atoms at attachment points are
labelled with the
index+1 of the corresponding atom in the core
\param requireDummyMatch - if set, only side chains that are connected to
atoms in
the core that have attached dummies will be
considered.
Molecules that have sidechains that are attached
at other points will be rejected (NULL returned).
\param useChirality - if set, match the coreQuery using chirality
\return a copy of \c mol with the non-matching atoms and bonds (if any)
removed and dummies at the connection points. The client is
responsible
for deleting this molecule. If the core query is not matched, NULL
is returned.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceCore(const ROMol &mol, const ROMol &core,
const MatchVectType &matchVect,
bool replaceDummies = true,
bool labelByIndex = false,
bool requireDummyMatch = false);
//! \brief Returns a copy of an ROMol with the atoms and bonds that
//! do fall within a substructure match removed.
//!
//! dummy atoms are left to indicate attachment points.
//!
/*!
Note that this is essentially identical to the replaceSidechains function,
except we
invert the query and replace the atoms that *do* match the query.
\param mol - the ROMol of interest
\param coreQuery - a query ROMol to be used to match the core
\param replaceDummies - if set, atoms matching dummies in the core will also
be replaced
\param labelByIndex - if set, the dummy atoms at attachment points are
labelled with the
index+1 of the corresponding atom in the core
\param requireDummyMatch - if set, only side chains that are connected to
atoms in
the core that have attached dummies will be
considered.
Molecules that have sidechains that are attached
at other points will be rejected (NULL returned).
\param useChirality - if set, match the coreQuery using chirality
\return a copy of \c mol with the non-matching atoms and bonds (if any)
removed and dummies at the connection points. The client is
responsible
for deleting this molecule. If the core query is not matched, NULL
is returned.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *replaceCore(const ROMol &mol, const ROMol &coreQuery,
bool replaceDummies = true, bool labelByIndex = false,
bool requireDummyMatch = false, bool useChirality = false);
//! \brief Carries out a Murcko decomposition on the molecule provided
//!
/*!
\param mol - the ROMol of interest
\return a new ROMol with the Murcko scaffold
The client is responsible for deleting this molecule.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *MurckoDecompose(const ROMol &mol);
//! \brief Combined two molecules to create a new one
//!
/*!
\param mol1 - the first ROMol to be combined
\param mol2 - the second ROMol to be combined
\param offset - a constant offset to be added to every
atom position in mol2
\return a new ROMol with the two molecules combined.
The new molecule has not been sanitized.
The client is responsible for deleting this molecule.
*/
RDKIT_CHEMTRANSFORMS_EXPORT ROMol *combineMols(const ROMol &mol1, const ROMol &mol2,
RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));
//! \brief Adds named recursive queries to a molecule's atoms based on atom
// labels
//!
/*!
\param mol - the molecule to be modified
\param queries - the dictionary of named queries to add
\param propName - the atom property to use to get query names
\param reactantLabels - to store pairs of (atom index, query string)
NOTES:
- existing query information, if present, will be supplemented (AND logic)
- non-query atoms will be replaced with query atoms using only the query
logic
- query names can be present as comma separated lists, they will then
be combined using OR logic.
- throws a KeyErrorException if a particular query name is not present
in \c queries
*/
RDKIT_CHEMTRANSFORMS_EXPORT void addRecursiveQueries(
ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
const std::string &propName,
std::vector<std::pair<unsigned int, std::string> > *reactantLabels = NULL);
//! \brief parses a query definition file and sets up a set of definitions
//! suitable for use by addRecursiveQueries()
/*!
\param filename - the name of the file to be read
\param queryDefs - the dictionary of named queries (return value)
\param standardize - if true, query names will be converted to lower
case
\param delimiter - the line delimiter in the file
\param comment - text used to recognize comment lines
\param nameColumn - column with the names of queries
\param smartsColumn - column with the SMARTS definitions of the queries
*/
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(const std::string &filename,
std::map<std::string, ROMOL_SPTR> &queryDefs,
bool standardize = true,
const std::string &delimiter = "\t",
const std::string &comment = "//",
unsigned int nameColumn = 0,
unsigned int smartsColumn = 1);
//! \overload
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(std::istream *inStream,
std::map<std::string, ROMOL_SPTR> &queryDefs,
bool standardize = true,
const std::string &delimiter = "\t",
const std::string &comment = "//",
unsigned int nameColumn = 0,
unsigned int smartsColumn = 1);
//! \brief equivalent to parseQueryDefFile() but the query definitions are
// explicitly passed in
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefText(const std::string &queryDefText,
std::map<std::string, ROMOL_SPTR> &queryDefs,
bool standardize = true,
const std::string &delimiter = "\t",
const std::string &comment = "//",
unsigned int nameColumn = 0,
unsigned int smartsColumn = 1);
}
#endif
|