File: CDXMLParser.h

package info (click to toggle)
rdkit 202503.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 222,000 kB
  • sloc: cpp: 411,111; python: 78,482; ansic: 26,181; java: 8,285; javascript: 4,404; sql: 2,393; yacc: 1,626; lex: 1,267; cs: 1,090; makefile: 581; xml: 229; fortran: 183; sh: 121
file content (163 lines) | stat: -rw-r--r-- 6,676 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
//
//  Copyright (c) 2022 Brian P Kelley
//  All rights reserved.
//
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_CDXML_FILEPARSERS_H
#define RD_CDXML_FILEPARSERS_H

#include <RDGeneral/types.h>
#include <string>
#include <iostream>
#include <vector>

namespace RDKit {
class RWMol;

namespace v2 {
namespace CDXMLParser {

enum class CDXMLFormat {
  CDXML = 0,
  CDX = 1,
  Auto = 2
};

//! \brief Returns true if the RDKit was build with ChemDraw CDX support
RDKIT_FILEPARSERS_EXPORT bool hasChemDrawCDXSupport();

struct RDKIT_FILEPARSERS_EXPORT CDXMLParserParams {
  bool sanitize = true;
  bool removeHs = true;
  CDXMLFormat format = CDXMLFormat::Auto;
  
  CDXMLParserParams() = default;
  CDXMLParserParams(bool sanitize, bool removeHs, CDXMLFormat format) :
    sanitize(sanitize), removeHs(removeHs), format(format) {}
};

//! \brief construct molecules from a CDXML file
//! The RDKit is optionally built with the Revvity ChemDraw parser
//! If this is available, CDX and CDXML can be read, see CDXMLParserParams
//!   Note that the CDXML format is large and complex, the RDKit doesn't
//!   support full functionality, just the base ones required for molecule and
//!   reaction parsing.
//! Note: If the ChemDraw extensions are available, this auto detects between
//!  CDXML and CDX
/*!
 *   \param inStream - string containing the mol block
 *   \param params - parameters controlling the parsing and post-processing
 */
RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>>
MolsFromCDXMLDataStream(std::istream &inStream,
                        const CDXMLParserParams &params = CDXMLParserParams());
//! \brief construct molecules from a CDXML file
//! The RDKit is optionally built with the Revvity ChemDraw parser
//! If this is available, CDX and CDXML can be read, see CDXMLParserParams
//!   Note that the CDXML format is large and complex, the RDKit doesn't
//!   support full functionality, just the base ones required for molecule and
//!   reaction parsing.
/*!
 *   \param fileName - cdxml fileName
 *   \param params - parameters controlling the parsing and post-processing
 */
RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLFile(
    const std::string &filename,
    const CDXMLParserParams &params = CDXMLParserParams(true, true, CDXMLFormat::Auto));

//! \brief construct molecules from a CDXML block
//! The RDKit is optionally built with the Revvity ChemDraw parser
//! If this is available, CDX and CDXML can be read, see CDXMLParserParams
//!   Note that the CDXML format is large and complex, the RDKit doesn't
//!   support full functionality, just the base ones required for molecule and
//!   reaction parsing.
//! Note: If the ChemDraw extensions are available,
//!   CDXMLFormat::Auto attempts to see if the input string is CDXML or CDX
/*!
 *   \param cdxml - string containing the mol block
 *   \param params - parameters controlling the parsing and post-processing
 */
RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
    const std::string &cdxml,
    const CDXMLParserParams &params = CDXMLParserParams(true, true, v2::CDXMLParser::CDXMLFormat::Auto));
}  // namespace CDXMLParser
}  // namespace v2

inline namespace v1 {

//! \brief construct molecules from a CDXML file
//! Note that the CDXML format is large and complex, the RDKit doesn't support
//!  full functionality, just the base ones required for molecule and
//!  reaction parsing.
//! Note: If the ChemDraw extensions are available, this auto detects between
//!  CDXML and CDX
/*!
 *   \param inStream - string containing the mol block
 *   \param sanitize - toggles sanitization and stereochemistry
 *                     perception of the molecule
 *   \param removeHs - toggles removal of Hs from the molecule. H removal
 *                     is only done if the molecule is sanitized
 * correctness of the contents.
 */
inline std::vector<std::unique_ptr<RWMol>> CDXMLDataStreamToMols(
    std::istream &inStream, bool sanitize = true, bool removeHs = true) {
  v2::CDXMLParser::CDXMLParserParams params(
      sanitize, removeHs, v2::CDXMLParser::CDXMLFormat::Auto);
  return v2::CDXMLParser::MolsFromCDXMLDataStream(inStream, params);
}

//! \brief construct molecules from a CDXML file
//! Note that the CDXML format is large and complex, the RDKit doesn't support
//!  full functionality, just the base ones required for molecule and
//!  reaction parsing.
//! Note: If the ChemDraw extensions are available,
//!   This function uses the file extension to determine the file type, .cdx or .cdxml
//!   If not, it defaults to CDXML
/*!
 *   \param fileName - cdxml fileName
 *   \param sanitize - toggles sanitization and stereochemistry
 *                     perception of the molecule
 *   \param removeHs - toggles removal of Hs from the molecule. H removal
 *                     is only done if the molecule is sanitized
 * correctness of the contents.
 */
inline std::vector<std::unique_ptr<RWMol>> CDXMLFileToMols(
    const std::string &filename, bool sanitize = true, bool removeHs = true) {
  v2::CDXMLParser::CDXMLParserParams params;
  params.sanitize = sanitize;
  params.removeHs = removeHs;
  params.format = v2::CDXMLParser::CDXMLFormat::Auto;
  return v2::CDXMLParser::MolsFromCDXMLFile(filename, params);
}

//! \brief construct molecules from a CDXML block
//! Note that the CDXML format is large and complex, the RDKit doesn't support
//!  full functionality, just the base ones required for molecule and
//!  reaction parsing.
//! Note: to parse CDX files see the CDXParserParams variant of this function
/*!
 *   \param cdxml - string containing the mol block
 *   \param sanitize - toggles sanitization and stereochemistry
 *                     perception of the molecule
 *   \param removeHs - toggles removal of Hs from the molecule. H removal
 *                     is only done if the molecule is sanitized
 * correctness of the contents.
 */
inline std::vector<std::unique_ptr<RWMol>> CDXMLToMols(const std::string &cdxml,
                                                       bool sanitize = true,
                                                       bool removeHs = true) {
  v2::CDXMLParser::CDXMLParserParams params;
  params.sanitize = sanitize;
  params.removeHs = removeHs;
  params.format = v2::CDXMLParser::CDXMLFormat::Auto;  
  return v2::CDXMLParser::MolsFromCDXML(cdxml, params);
}
}  // namespace v1

}  // namespace RDKit
#endif  // RD_CDXML_FILEPARSERS_H