1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
//
// Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_SCAFFOLDNETWORK_H
#define RD_SCAFFOLDNETWORK_H
#include <vector>
#include <map>
#include <string>
#include <sstream>
#include <memory>
#include <iostream>
#ifdef RDK_USE_BOOST_SERIALIZATION
#include <RDGeneral/Invariant.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/shared_ptr.hpp>
#include <boost/serialization/version.hpp>
#include <RDGeneral/BoostEndInclude.h>
#endif
namespace RDKit {
class ROMol;
class ChemicalReaction;
namespace ScaffoldNetwork {
struct RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams {
bool includeGenericScaffolds =
true; ///< include scaffolds with all atoms replaced by dummies
bool includeGenericBondScaffolds =
false; ///< include scaffolds with all bonds replaced by single bonds
bool includeScaffoldsWithoutAttachments =
true; ///< remove attachment points from scaffolds and include the result
bool includeScaffoldsWithAttachments =
true; ///< Include the version of the scaffold with attachment points
bool includeNames =
false; ///< Include molecules names of the input molecules
bool keepOnlyFirstFragment =
true; ///< keep only the first fragment from the bond breaking rule
bool pruneBeforeFragmenting =
true; ///< Do a pruning/flattening step before starting fragmenting
bool flattenIsotopes = true; ///< remove isotopes when flattening
bool flattenChirality =
true; ///< remove chirality and bond stereo when flattening
bool flattenKeepLargest =
true; ///< keep only the largest fragment when doing flattening
bool collectMolCounts = true; ///< keep track of the number of molecules each
///< scaffold was reached from
std::vector<std::shared_ptr<ChemicalReaction>>
bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
///< single reactant and produce two products
ScaffoldNetworkParams()
: ScaffoldNetworkParams{{"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {}
ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
};
enum class EdgeType {
Fragment = 1, ///< molecule -> fragment
Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
Initialize = 5 ///< molecule -> flattened molecule
};
struct RDKIT_SCAFFOLDNETWORK_EXPORT NetworkEdge {
size_t beginIdx;
size_t endIdx;
EdgeType type;
NetworkEdge() : beginIdx(0), endIdx(0), type(EdgeType::Initialize) {}
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
: beginIdx(bi), endIdx(ei), type(typ) {}
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const {
return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
}
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const {
return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
}
#ifdef RDK_USE_BOOST_SERIALIZATION
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive &ar, const unsigned int version) {
RDUNUSED_PARAM(version);
ar & beginIdx;
ar & endIdx;
ar & type;
}
#endif
};
struct RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetwork {
std::vector<std::string> nodes; ///< SMILES for the scaffolds
std::vector<unsigned>
counts; ///< number of times each scaffold was encountered
std::vector<unsigned>
molCounts; ///< number of molecules each scaffold was found in
std::vector<NetworkEdge> edges; ///< edges in the network
ScaffoldNetwork() {}
#ifdef RDK_USE_BOOST_SERIALIZATION
ScaffoldNetwork(const std::string &pkl) {
std::stringstream iss(pkl);
boost::archive::text_iarchive ia(iss);
ia >> *this;
}
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive &ar, const unsigned int version) {
RDUNUSED_PARAM(version);
ar & nodes;
ar & counts;
if (version > 0) {
ar & molCounts;
}
ar & edges;
}
#endif
};
//! update an existing ScaffoldNetwork using a set of molecules
template <typename T>
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network,
const ScaffoldNetworkParams ¶ms);
//! create a new ScaffoldNetwork for a set of molecules
template <typename T>
ScaffoldNetwork createScaffoldNetwork(const T &mols,
const ScaffoldNetworkParams ¶ms) {
ScaffoldNetwork res;
updateScaffoldNetwork(mols, res, params);
return res;
}
//! allows nodes to output nicely as strings
inline std::ostream &operator<<(std::ostream &ostr,
const RDKit::ScaffoldNetwork::EdgeType &e) {
switch (e) {
case RDKit::ScaffoldNetwork::EdgeType::Fragment:
ostr << "Fragment";
break;
case RDKit::ScaffoldNetwork::EdgeType::Generic:
ostr << "Generic";
break;
case RDKit::ScaffoldNetwork::EdgeType::GenericBond:
ostr << "GenericBond";
break;
case RDKit::ScaffoldNetwork::EdgeType::RemoveAttachment:
ostr << "RemoveAttachment";
break;
case RDKit::ScaffoldNetwork::EdgeType::Initialize:
ostr << "Initialize";
break;
default:
ostr << "UNKNOWN";
break;
}
return ostr;
}
//! allows edges to output nicely as strings
inline std::ostream &operator<<(std::ostream &ostr,
const RDKit::ScaffoldNetwork::NetworkEdge &e) {
ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
<< ", type:" << e.type << " )";
return ostr;
}
//! returns parameters for constructing scaffold networks using BRICS
//! fragmentation
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams();
} // namespace ScaffoldNetwork
} // namespace RDKit
#ifdef RDK_USE_BOOST_SERIALIZATION
namespace boost {
namespace serialization {
template <>
struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
BOOST_STATIC_CONSTANT(int, value = 1);
};
} // namespace serialization
} // namespace boost
#endif
#endif
|