File: ScaffoldNetwork.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (197 lines) | stat: -rw-r--r-- 6,670 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
//
//  Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_SCAFFOLDNETWORK_H
#define RD_SCAFFOLDNETWORK_H

#include <vector>
#include <map>
#include <string>
#include <sstream>
#include <memory>
#include <iostream>

#ifdef RDK_USE_BOOST_SERIALIZATION
#include <RDGeneral/Invariant.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/shared_ptr.hpp>
#include <boost/serialization/version.hpp>
#include <RDGeneral/BoostEndInclude.h>
#endif

namespace RDKit {
class ROMol;
class ChemicalReaction;

namespace ScaffoldNetwork {

struct RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams {
  bool includeGenericScaffolds =
      true;  ///< include scaffolds with all atoms replaced by dummies
  bool includeGenericBondScaffolds =
      false;  ///< include scaffolds with all bonds replaced by single bonds
  bool includeScaffoldsWithoutAttachments =
      true;  ///< remove attachment points from scaffolds and include the result
  bool includeScaffoldsWithAttachments =
      true;  ///< Include the version of the scaffold with attachment points
  bool includeNames =
      false;  ///< Include molecules names of the input molecules
  bool keepOnlyFirstFragment =
      true;  ///<  keep only the first fragment from the bond breaking rule
  bool pruneBeforeFragmenting =
      true;  ///<  Do a pruning/flattening step before starting fragmenting
  bool flattenIsotopes = true;  ///< remove isotopes when flattening
  bool flattenChirality =
      true;  ///< remove chirality and bond stereo when flattening
  bool flattenKeepLargest =
      true;  ///< keep only the largest fragment when doing flattening
  bool collectMolCounts = true;  ///< keep track of the number of molecules each
                                 ///< scaffold was reached from

  std::vector<std::shared_ptr<ChemicalReaction>>
      bondBreakersRxns;  ///< the reaction(s) used to fragment. Should expect a
                         ///< single reactant and produce two products
  ScaffoldNetworkParams()
      : ScaffoldNetworkParams{{"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {}
  ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
};

enum class EdgeType {
  Fragment = 1,     ///< molecule -> fragment
  Generic = 2,      ///< molecule -> generic molecule (all atoms are dummies)
  GenericBond = 3,  ///< molecule -> generic bond molecule (all bonds single)
  RemoveAttachment = 4,  ///< molecule -> molecule with no attachment points
  Initialize = 5         ///< molecule -> flattened molecule
};

struct RDKIT_SCAFFOLDNETWORK_EXPORT NetworkEdge {
  size_t beginIdx;
  size_t endIdx;
  EdgeType type;
  NetworkEdge() : beginIdx(0), endIdx(0), type(EdgeType::Initialize) {}
  NetworkEdge(size_t bi, size_t ei, EdgeType typ)
      : beginIdx(bi), endIdx(ei), type(typ) {}
  bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const {
    return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
  }
  bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const {
    return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
  }
#ifdef RDK_USE_BOOST_SERIALIZATION
 private:
  friend class boost::serialization::access;
  template <class Archive>
  void serialize(Archive &ar, const unsigned int version) {
    RDUNUSED_PARAM(version);
    ar & beginIdx;
    ar & endIdx;
    ar & type;
  }
#endif
};

struct RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetwork {
  std::vector<std::string> nodes;  ///< SMILES for the scaffolds
  std::vector<unsigned>
      counts;  ///< number of times each scaffold was encountered
  std::vector<unsigned>
      molCounts;  ///< number of molecules each scaffold was found in
  std::vector<NetworkEdge> edges;  ///< edges in the network
  ScaffoldNetwork() {}
#ifdef RDK_USE_BOOST_SERIALIZATION
  ScaffoldNetwork(const std::string &pkl) {
    std::stringstream iss(pkl);
    boost::archive::text_iarchive ia(iss);
    ia >> *this;
  }

 private:
  friend class boost::serialization::access;
  template <class Archive>
  void serialize(Archive &ar, const unsigned int version) {
    RDUNUSED_PARAM(version);
    ar & nodes;
    ar & counts;
    if (version > 0) {
      ar & molCounts;
    }
    ar & edges;
  }
#endif
};

//! update an existing ScaffoldNetwork using a set of molecules
template <typename T>
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network,
                           const ScaffoldNetworkParams &params);

//! create a new ScaffoldNetwork for a set of molecules
template <typename T>
ScaffoldNetwork createScaffoldNetwork(const T &mols,
                                      const ScaffoldNetworkParams &params) {
  ScaffoldNetwork res;
  updateScaffoldNetwork(mols, res, params);
  return res;
}
//! allows nodes to output nicely as strings
inline std::ostream &operator<<(std::ostream &ostr,
                                const RDKit::ScaffoldNetwork::EdgeType &e) {
  switch (e) {
    case RDKit::ScaffoldNetwork::EdgeType::Fragment:
      ostr << "Fragment";
      break;
    case RDKit::ScaffoldNetwork::EdgeType::Generic:
      ostr << "Generic";
      break;
    case RDKit::ScaffoldNetwork::EdgeType::GenericBond:
      ostr << "GenericBond";
      break;
    case RDKit::ScaffoldNetwork::EdgeType::RemoveAttachment:
      ostr << "RemoveAttachment";
      break;
    case RDKit::ScaffoldNetwork::EdgeType::Initialize:
      ostr << "Initialize";
      break;
    default:
      ostr << "UNKNOWN";
      break;
  }
  return ostr;
}
//! allows edges to output nicely as strings
inline std::ostream &operator<<(std::ostream &ostr,
                                const RDKit::ScaffoldNetwork::NetworkEdge &e) {
  ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
       << ", type:" << e.type << " )";
  return ostr;
}

//! returns parameters for constructing scaffold networks using BRICS
//! fragmentation
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams();

}  // namespace ScaffoldNetwork
}  // namespace RDKit

#ifdef RDK_USE_BOOST_SERIALIZATION
namespace boost {
namespace serialization {
template <>
struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
  BOOST_STATIC_CONSTANT(int, value = 1);
};
}  // namespace serialization
}  // namespace boost
#endif

#endif