File: SmilesParse.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (254 lines) | stat: -rw-r--r-- 9,142 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
//
//  Copyright (C) 2001-2021 Greg Landrum and other RDKit contributors
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_SMILESPARSE_H
#define RD_SMILESPARSE_H

#include <GraphMol/SanitException.h>
#include <string>
#include <exception>
#include <map>
#include <memory>

namespace RDKit {
class RWMol;
class Atom;
class Bond;

namespace SmilesParse {
class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
 public:
  SmilesParseException(const char *msg) : _msg(msg) {}
  SmilesParseException(const std::string msg) : _msg(msg) {}
  const char *what() const noexcept override { return _msg.c_str(); }
  ~SmilesParseException() noexcept override = default;

 private:
  std::string _msg;
};

}  // namespace SmilesParse

namespace v2 {
namespace SmilesParse {
using RDKit::SmilesParse::SmilesParseException;

struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams {
  bool sanitize = true;      /**< sanitize the molecule after building it */
  bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
  bool strictCXSMILES =
      true; /**< throw an exception if the CXSMILES parsing fails */
  bool parseName = true;    /**< parse (and set) the molecule name as well */
  bool removeHs = true;     /**< remove Hs after constructing the molecule */
  bool skipCleanup = false; /**<  skip the final cleanup stage */
  bool debugParse = false;  /**< enable debugging in the SMILES parser*/
  std::map<std::string, std::string>
      replacements; /**< allows SMILES "macros" */
};

struct RDKIT_SMILESPARSE_EXPORT SmartsParserParams {
  bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
  bool strictCXSMILES =
      true; /**< throw an exception if the CXSMILES parsing fails */
  bool parseName = true; /**< parse (and set) the molecule name as well */
  bool mergeHs =
      false; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
  bool skipCleanup = false; /**<  skip the final cleanup stage */
  bool debugParse = false;  /**< enable debugging in the SMARTS parser*/
  std::map<std::string, std::string>
      replacements; /**< allows SMARTS "macros" */
};

RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSmiles(
    const std::string &smi,
    const SmilesParserParams &params = SmilesParserParams());
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSmarts(
    const std::string &sma,
    const SmartsParserParams &params = SmartsParserParams());

RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Atom> AtomFromSmiles(
    const std::string &smi);
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Bond> BondFromSmiles(
    const std::string &smi);

RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Atom> AtomFromSmarts(
    const std::string &sma);
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Bond> BondFromSmarts(
    const std::string &sma);

}  // namespace SmilesParse
}  // namespace v2

inline namespace v1 {
using RDKit::SmilesParse::SmilesParseException;

struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams {
  int debugParse = 0;   /**< enable debugging in the SMILES parser*/
  bool sanitize = true; /**< sanitize the molecule after building it */
  std::map<std::string, std::string> *replacements =
      nullptr;               /**< allows SMILES "macros" */
  bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
  bool strictCXSMILES =
      true; /**< throw an exception if the CXSMILES parsing fails */
  bool parseName = true;    /**< parse (and set) the molecule name as well */
  bool removeHs = true;     /**< remove Hs after constructing the molecule */
  bool skipCleanup = false; /**<  skip the final cleanup stage */
};

struct RDKIT_SMILESPARSE_EXPORT SmartsParserParams {
  int debugParse = 0; /**< enable debugging in the SMARTS parser*/
  std::map<std::string, std::string> *replacements =
      nullptr;               /**< allows SMARTS "macros" */
  bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
  bool strictCXSMILES =
      true; /**< throw an exception if the CXSMILES parsing fails */
  bool parseName = true; /**< parse (and set) the molecule name as well */
  bool mergeHs =
      false; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
  bool skipCleanup = false; /**<  skip the final cleanup stage */
};

inline RDKit::RWMol *SmilesToMol(const std::string &smi,
                                 const SmilesParserParams &ps) {
  RDKit::v2::SmilesParse::SmilesParserParams v2ps;
  v2ps.debugParse = ps.debugParse;
  v2ps.sanitize = ps.sanitize;

  if (ps.replacements) {
    v2ps.replacements = *ps.replacements;
  }
  v2ps.allowCXSMILES = ps.allowCXSMILES;
  v2ps.strictCXSMILES = ps.strictCXSMILES;
  v2ps.parseName = ps.parseName;
  v2ps.removeHs = ps.removeHs;
  v2ps.skipCleanup = ps.skipCleanup;
  return RDKit::v2::SmilesParse::MolFromSmiles(smi, v2ps).release();
}

inline Atom *SmilesToAtom(const std::string &smi) {
  auto res = RDKit::v2::SmilesParse::AtomFromSmiles(smi).release();
  return res;
}

inline Bond *SmilesToBond(const std::string &smi) {
  return RDKit::v2::SmilesParse::BondFromSmiles(smi).release();
}

//! Construct a molecule from a SMILES string
/*!
 \param smi           the SMILES to convert
 \param debugParse    toggles verbose debugging information from the parser
 \param sanitize      toggles H removal and sanitization of the molecule
 \param replacements  a string->string map of replacement strings. See below
                      for more information about replacements.

 \return a pointer to the new molecule; the caller is responsible for free'ing
 this.

 The optional replacements map can be used to do string substitution of
 abbreviations
 in the input SMILES. The set of substitutions is repeatedly looped through
 until
 the string no longer changes. It is the responsibility of the caller to make
 sure
 that substitutions results in legal and sensible SMILES.

 Examples of substitutions:
 \code
   CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
   C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
   C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC
 \endcode

 */
inline RWMol *SmilesToMol(
    const std::string &smi, int debugParse = 0, bool sanitize = true,
    std::map<std::string, std::string> *replacements = nullptr) {
  RDKit::v2::SmilesParse::SmilesParserParams params;
  params.debugParse = debugParse;
  if (replacements) {
    params.replacements = *replacements;
  }
  if (sanitize) {
    params.sanitize = true;
    params.removeHs = true;
  } else {
    params.sanitize = false;
    params.removeHs = false;
  }
  return RDKit::v2::SmilesParse::MolFromSmiles(smi, params).release();
};

inline RWMol *SmartsToMol(const std::string &sma,
                          const SmartsParserParams &ps) {
  RDKit::v2::SmilesParse::SmartsParserParams v2ps;
  v2ps.debugParse = ps.debugParse;
  if (ps.replacements) {
    v2ps.replacements = *ps.replacements;
  }
  v2ps.allowCXSMILES = ps.allowCXSMILES;
  v2ps.strictCXSMILES = ps.strictCXSMILES;
  v2ps.parseName = ps.parseName;
  v2ps.mergeHs = ps.mergeHs;
  v2ps.skipCleanup = ps.skipCleanup;

  return RDKit::v2::SmilesParse::MolFromSmarts(sma, v2ps).release();
}

//! Construct a molecule from a SMARTS string
/*!
 \param sma           the SMARTS to convert
 \param debugParse    toggles verbose debugging information from the parser
 \param mergeHs       toggles merging H atoms in the SMARTS into neighboring
 atoms
 \param replacements  a string->string map of replacement strings.
                      \see SmilesToMol for more information about replacements

 \return a pointer to the new molecule; the caller is responsible for free'ing
 this.
 */
inline RWMol *SmartsToMol(
    const std::string &sma, int debugParse = 0, bool mergeHs = false,
    std::map<std::string, std::string> *replacements = nullptr) {
  RDKit::v2::SmilesParse::SmartsParserParams ps;
  ps.debugParse = debugParse;
  ps.mergeHs = mergeHs;
  if (replacements) {
    ps.replacements = *replacements;
  }
  return RDKit::v2::SmilesParse::MolFromSmarts(sma, ps).release();
};

inline Atom *SmartsToAtom(const std::string &sma) {
  return RDKit::v2::SmilesParse::AtomFromSmarts(sma).release();
}
inline Bond *SmartsToBond(const std::string &sma) {
  return RDKit::v2::SmilesParse::BondFromSmarts(sma).release();
}
}  // namespace v1

inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
                                                        size_t len) {
  std::string smi(text, len);
  try {
    return v2::SmilesParse::MolFromSmiles(smi);
  } catch (const RDKit::MolSanitizeException &) {
    return nullptr;
  }
}
inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
                                                        size_t len) {
  std::string smi(text, len);
  return v2::SmilesParse::MolFromSmarts(smi);
}

}  // namespace RDKit

#endif