1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
//
// Copyright (c) 2022 Brian P Kelley
// All rights reserved.
//
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef RD_CDXML_FILEPARSERS_H
#define RD_CDXML_FILEPARSERS_H
#include <RDGeneral/types.h>
#include <string>
#include <iostream>
#include <vector>
namespace RDKit {
class RWMol;
namespace v2 {
namespace CDXMLParser {
enum class CDXMLFormat {
CDXML = 0,
CDX = 1,
Auto = 2
};
//! \brief Returns true if the RDKit was build with ChemDraw CDX support
RDKIT_FILEPARSERS_EXPORT bool hasChemDrawCDXSupport();
struct RDKIT_FILEPARSERS_EXPORT CDXMLParserParams {
bool sanitize = true;
bool removeHs = true;
CDXMLFormat format = CDXMLFormat::Auto;
CDXMLParserParams() = default;
CDXMLParserParams(bool sanitize, bool removeHs, CDXMLFormat format) :
sanitize(sanitize), removeHs(removeHs), format(format) {}
};
//! \brief construct molecules from a CDXML file
//! The RDKit is optionally built with the Revvity ChemDraw parser
//! If this is available, CDX and CDXML can be read, see CDXMLParserParams
//! Note that the CDXML format is large and complex, the RDKit doesn't
//! support full functionality, just the base ones required for molecule and
//! reaction parsing.
//! Note: If the ChemDraw extensions are available, this auto detects between
//! CDXML and CDX
/*!
* \param inStream - string containing the mol block
* \param params - parameters controlling the parsing and post-processing
*/
RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>>
MolsFromCDXMLDataStream(std::istream &inStream,
const CDXMLParserParams ¶ms = CDXMLParserParams());
//! \brief construct molecules from a CDXML file
//! The RDKit is optionally built with the Revvity ChemDraw parser
//! If this is available, CDX and CDXML can be read, see CDXMLParserParams
//! Note that the CDXML format is large and complex, the RDKit doesn't
//! support full functionality, just the base ones required for molecule and
//! reaction parsing.
/*!
* \param fileName - cdxml fileName
* \param params - parameters controlling the parsing and post-processing
*/
RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLFile(
const std::string &filename,
const CDXMLParserParams ¶ms = CDXMLParserParams(true, true, CDXMLFormat::Auto));
//! \brief construct molecules from a CDXML block
//! The RDKit is optionally built with the Revvity ChemDraw parser
//! If this is available, CDX and CDXML can be read, see CDXMLParserParams
//! Note that the CDXML format is large and complex, the RDKit doesn't
//! support full functionality, just the base ones required for molecule and
//! reaction parsing.
//! Note: If the ChemDraw extensions are available,
//! CDXMLFormat::Auto attempts to see if the input string is CDXML or CDX
/*!
* \param cdxml - string containing the mol block
* \param params - parameters controlling the parsing and post-processing
*/
RDKIT_FILEPARSERS_EXPORT std::vector<std::unique_ptr<RWMol>> MolsFromCDXML(
const std::string &cdxml,
const CDXMLParserParams ¶ms = CDXMLParserParams(true, true, v2::CDXMLParser::CDXMLFormat::Auto));
} // namespace CDXMLParser
} // namespace v2
inline namespace v1 {
//! \brief construct molecules from a CDXML file
//! Note that the CDXML format is large and complex, the RDKit doesn't support
//! full functionality, just the base ones required for molecule and
//! reaction parsing.
//! Note: If the ChemDraw extensions are available, this auto detects between
//! CDXML and CDX
/*!
* \param inStream - string containing the mol block
* \param sanitize - toggles sanitization and stereochemistry
* perception of the molecule
* \param removeHs - toggles removal of Hs from the molecule. H removal
* is only done if the molecule is sanitized
* correctness of the contents.
*/
inline std::vector<std::unique_ptr<RWMol>> CDXMLDataStreamToMols(
std::istream &inStream, bool sanitize = true, bool removeHs = true) {
v2::CDXMLParser::CDXMLParserParams params(
sanitize, removeHs, v2::CDXMLParser::CDXMLFormat::Auto);
return v2::CDXMLParser::MolsFromCDXMLDataStream(inStream, params);
}
//! \brief construct molecules from a CDXML file
//! Note that the CDXML format is large and complex, the RDKit doesn't support
//! full functionality, just the base ones required for molecule and
//! reaction parsing.
//! Note: If the ChemDraw extensions are available,
//! This function uses the file extension to determine the file type, .cdx or .cdxml
//! If not, it defaults to CDXML
/*!
* \param fileName - cdxml fileName
* \param sanitize - toggles sanitization and stereochemistry
* perception of the molecule
* \param removeHs - toggles removal of Hs from the molecule. H removal
* is only done if the molecule is sanitized
* correctness of the contents.
*/
inline std::vector<std::unique_ptr<RWMol>> CDXMLFileToMols(
const std::string &filename, bool sanitize = true, bool removeHs = true) {
v2::CDXMLParser::CDXMLParserParams params;
params.sanitize = sanitize;
params.removeHs = removeHs;
params.format = v2::CDXMLParser::CDXMLFormat::Auto;
return v2::CDXMLParser::MolsFromCDXMLFile(filename, params);
}
//! \brief construct molecules from a CDXML block
//! Note that the CDXML format is large and complex, the RDKit doesn't support
//! full functionality, just the base ones required for molecule and
//! reaction parsing.
//! Note: to parse CDX files see the CDXParserParams variant of this function
/*!
* \param cdxml - string containing the mol block
* \param sanitize - toggles sanitization and stereochemistry
* perception of the molecule
* \param removeHs - toggles removal of Hs from the molecule. H removal
* is only done if the molecule is sanitized
* correctness of the contents.
*/
inline std::vector<std::unique_ptr<RWMol>> CDXMLToMols(const std::string &cdxml,
bool sanitize = true,
bool removeHs = true) {
v2::CDXMLParser::CDXMLParserParams params;
params.sanitize = sanitize;
params.removeHs = removeHs;
params.format = v2::CDXMLParser::CDXMLFormat::Auto;
return v2::CDXMLParser::MolsFromCDXML(cdxml, params);
}
} // namespace v1
} // namespace RDKit
#endif // RD_CDXML_FILEPARSERS_H
|