File: CompressedSDMolSupplier.cpp

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (130 lines) | stat: -rw-r--r-- 4,283 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// $Id: SDMolSupplier.cpp 585 2008-03-30 13:36:56Z glandrum $
//
//  Copyright (C) 2009 Greg Landrum
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//

#define NO_IMPORT_ARRAY
#include <RDBoost/python.h>
#include <string>

#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/algorithm/string.hpp>

namespace io = boost::iostreams;

// ours
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/Wrap/ContextManagers.h>
#include <GraphMol/RDKitBase.h>
#include <RDBoost/Wrap.h>

#include "MolSupplier.h"

namespace python = boost::python;

namespace RDKit {

// ForwardSDMolSupplier cannot (yet?) be reset, so we have to override
// the template that was defined in MolSupplier.h.
// Note that this returns a pointer to the supplier itself, so be careful
// that it doesn't get deleted by python!
template <>
ForwardSDMolSupplier *MolSupplIter(ForwardSDMolSupplier *suppl) {
  return suppl;
}

ROMol *MolSupplNext(ForwardSDMolSupplier *suppl) {
  ROMol *res = 0;
  if (!suppl->atEnd()) {
    try {
      res = suppl->next();
    } catch (...) {
      res = 0;
    }
  }
  if (!res && suppl->atEnd()) {
    PyErr_SetString(PyExc_StopIteration, "End of supplier hit");
    throw boost::python::error_already_set();
  }
  return res;
}

ForwardSDMolSupplier *createForwardSupplier(std::string filename, bool sanitize,
                                            bool removeHs) {
  std::vector<std::string> splitName;
  boost::split(splitName, filename, boost::is_any_of("."));
  std::unique_ptr<io::filtering_istream> strm(new io::filtering_istream());
  if (splitName.back() == "sdf") {
  } else if (splitName.back() == "gz") {
#ifndef RDK_NOGZIP
    strm->push(io::gzip_decompressor());
#else
    throw_value_error("gzip support not enabled");
#endif
  } else if (splitName.back() == "bz2") {
#ifndef RDK_NOBZIP2
    strm->push(io::bzip2_decompressor());
#else
    throw_value_error("bzip2 support not enabled");
#endif
  } else {
    std::string errorTxt = "Unrecognized extension: " + splitName.back();
    throw_value_error(errorTxt);
  }
  io::file_source fileSource(filename);
  if (!fileSource.is_open()) {
    std::string errorTxt = "could not open file: " + filename;
    throw_value_error(errorTxt);
  }
  strm->push(fileSource);

  ForwardSDMolSupplier *res =
      new ForwardSDMolSupplier(strm.release(), true, sanitize, removeHs);
  return res;
}

std::string csdMolSupplierClassDoc =
    "A class which supplies molecules from an SD file.\n\
\n\
  Usage examples:\n\
\n\
    1) Lazy evaluation: the molecules are not constructed until we ask for them:\n\n\
       >>> suppl = SDMolSupplier('in.smi')\n\
       >>> for mol in suppl:\n\
       ...    mol.GetNumAtoms()\n\
\n\
  Properties in the SD file are used to set properties on each molecule.\n\
  The properties are accessible using the mol.GetProp(propName) method.\n\
\n";
struct compressedsdmolsup_wrap {
  static void wrap() {
    python::class_<ForwardSDMolSupplier, boost::noncopyable>(
        "_CompressedSDMolSupplier", csdMolSupplierClassDoc.c_str(),
        python::no_init)
        .def("__iter__", &MolSupplIter<ForwardSDMolSupplier>,
             python::return_internal_reference<1>())
        .def("__enter__", &MolIOEnter<ForwardSDMolSupplier>,
             python::return_internal_reference<>())
        .def("__exit__", &MolIOExit<ForwardSDMolSupplier>)
        .def("__next__", &MolSupplNext<ForwardSDMolSupplier>,
             "Returns the next molecule in the file.  Raises _StopIteration_ "
             "on EOF.\n",
             python::return_value_policy<python::manage_new_object>());
    python::def("CompressedSDMolSupplier", createForwardSupplier,
                (python::arg("fileName"), python::arg("sanitize") = true,
                 python::arg("removeHs") = true),
                python::return_value_policy<python::manage_new_object>());
  };
};
}  // namespace RDKit

void wrap_compressedsdsupplier() { RDKit::compressedsdmolsup_wrap::wrap(); }