File: MaeMolSupplier.cpp

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (147 lines) | stat: -rw-r--r-- 4,699 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
//
//  Copyright (C) 2018 Pat Lorton
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <iostream>
#include <fstream>
#include <map>
#include <RDGeneral/BadFileException.h>
#include <RDGeneral/FileParseException.h>
#include <GraphMol/MolInterchange/details.h>
#include <GraphMol/MolOps.h>
#include <GraphMol/RWMol.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <maeparser/Reader.hpp>


namespace RDKit {

using RDKit::MolInterchange::bolookup;

MaeMolSupplier::MaeMolSupplier(std::istream *inStream, bool takeOwnership,
                               bool sanitize, bool removeHs) {
  PRECONDITION(inStream, "bad stream");
  dp_inStream = inStream;
  df_owner = takeOwnership;
  df_sanitize = sanitize;
  df_removeHs = removeHs;

  d_reader.reset(new schrodinger::mae::Reader(*inStream));
  d_next_struct = d_reader->next("f_m_ct");
}

MaeMolSupplier::MaeMolSupplier(const std::string &fileName, bool sanitize, bool removeHs) {
  df_owner = true;
  auto *ifs = new std::ifstream(fileName.c_str(), std::ios_base::binary);
  if (!ifs || !(*ifs) || ifs->bad()) {
    std::ostringstream errout;
    errout << "Bad input file " << fileName;
    throw BadFileException(errout.str());
  }
  dp_inStream = (std::istream *)ifs;
  df_sanitize = sanitize;
  df_removeHs = removeHs;

  d_reader.reset(new schrodinger::mae::Reader(*ifs));
  d_next_struct = d_reader->next("f_m_ct");
}

void MaeMolSupplier::init() {}
void MaeMolSupplier::reset() {}

ROMol *MaeMolSupplier::next() {
  if (d_next_struct == nullptr) {
    throw FileParseException("All structures read from Maestro file");
  }
  // Make sure even if later calls except, we're ready to read the next struct
  auto current_struct = d_next_struct;
  d_next_struct = d_reader->next("f_m_ct");

  auto mol = new RWMol();
  auto mol_title = current_struct->getStringProperty("s_m_title");
  mol->setProp(common_properties::_Name, mol_title);
  // Atom data is in the m_atom indexed block
  {
    const auto atom_data = current_struct->getIndexedBlock("m_atom");
    // All atoms are gauranteed to have these three field names:
    const auto atomic_numbers = atom_data->getIntProperty("i_m_atomic_number");
    const auto xs = atom_data->getRealProperty("r_m_x_coord");
    const auto ys = atom_data->getRealProperty("r_m_y_coord");
    const auto zs = atom_data->getRealProperty("r_m_z_coord");
    const auto size = atomic_numbers->size();
    std::shared_ptr<schrodinger::mae::IndexedIntProperty> atomic_charges;
    try {
      atomic_charges = atom_data->getIntProperty("i_m_formal_charge");
    } catch(std::out_of_range& e) { }

    // atomic numbers, and x, y, and z coordinates
    auto conf = new RDKit::Conformer(size);
    conf->set3D(true);
    conf->setId(0);
    for (size_t i=0; i<size; ++i) {
      Atom *atom = new Atom(atomic_numbers->at(i));
      mol->addAtom(atom, true, true);
      if(atomic_charges) {
          atom->setFormalCharge(atomic_charges->at(i));
      }

      RDGeom::Point3D pos;
      pos.x = xs->at(i);
      pos.y = ys->at(i);
      pos.z = zs->at(i);
      conf->setAtomPos(i, pos);
    }
    mol->addConformer(conf, false);
  }

  // Bond data is in the m_bond indexed block
  {
    const auto bond_data = current_struct->getIndexedBlock("m_bond");
    // All bonds are gauranteed to have these three field names:
    auto from_atoms = bond_data->getIntProperty("i_m_from");
    auto to_atoms = bond_data->getIntProperty("i_m_to");
    auto orders = bond_data->getIntProperty("i_m_order");
    const auto size = from_atoms->size();

    for (size_t i=0; i<size; ++i) {
      // Maestro atoms are 1 indexed!
      const auto from_atom = from_atoms->at(i) - 1;
      const auto to_atom = to_atoms->at(i) - 1;
      const auto order = bolookup.find(orders->at(i))->second;
      if(from_atom > to_atom) continue; // Maestro files double-list bonds

      auto bond = new Bond(order);
      bond->setOwningMol(mol);
      bond->setBeginAtomIdx(from_atom);
      bond->setEndAtomIdx(to_atom);
      mol->addBond(bond, true);
    }
  }

  if (df_sanitize) {
    if (df_removeHs) {
      MolOps::removeHs(*mol, false, false);
    } else {
      MolOps::sanitizeMol(*mol);
    }
  } else {
    // we need some properties for the chiral setup
    mol->updatePropertyCache(false);
  }

  /* Set tetrahedral chirality from 3D co-ordinates */
  MolOps::assignChiralTypesFrom3D(*mol);
  MolOps::detectBondStereochemistry(*mol);

  return (ROMol *)mol;
}

bool MaeMolSupplier::atEnd() {
  return d_next_struct == nullptr;
}
}