File: StripSmallFragments.cpp

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (144 lines) | stat: -rw-r--r-- 4,322 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
//
//  Copyright (C) 2016 Novartis Institutes for BioMedical Research
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <map>

#include "../MolOps.h"
#include "../Descriptors/MolDescriptors.h"
#include "StripSmallFragments.h"
#include "../SmilesParse/SmilesWrite.h"
#include "../FileParsers/MolFileStereochem.h"

// define snprintf for msvc
#if _MSC_VER
#if _MSC_VER < 1900
#define snprintf _snprintf
#endif
#endif

namespace RDKit {
namespace StructureCheck {

static inline std::string getMolecularFormula(const ROMol &mol) {
  return RDKit::Descriptors::calcMolFormula(mol);
}

void AddMWMF(RWMol &mol,
             bool pre) {  // set formula & mass properties "MW_PRE" "MW_POST"
  double mass = 0.0;
  mass = RDKit::Descriptors::calcExactMW(mol);
  /*
          for (unsigned i = 0; i < mol.getNumAtoms(); i++) {
               const Atom& atom = *mol.getAtomWithIdx(i);
               mass += atom.getMass();
               mass += atom.getNumImplicitHs() * 1.0080; // and add implicit
     Hydrogens mass
           }
  */
  std::string formula = getMolecularFormula(mol);
  if (!formula.empty()) mol.setProp((pre ? "MF_PRE" : "MF_POST"), formula);
  char propertyValue[64];
  snprintf(propertyValue, sizeof(propertyValue), "%g", mass);
  mol.setProp((pre ? "MW_PRE" : "MW_POST"), mass);
}

bool StripSmallFragments(RWMol &mol, bool verbose) {
  const bool sanitize = false;
  std::vector<boost::shared_ptr<ROMol>> frags =
      MolOps::getMolFrags(mol, sanitize);
  if (frags.size() <= 1) return false;

  size_t maxFragSize = 0;
  size_t maxFragIdx = 0;

  for (size_t i = 0; i < frags.size(); ++i) {
    const unsigned int fragSize = frags[i].get()->getNumAtoms();
    if (fragSize >= maxFragSize) {
      maxFragSize = fragSize;
      maxFragIdx = i;
    }
  }

  if (verbose) {
    std::string name = "<no name>";
    mol.getPropIfPresent(common_properties::_Name, name);
    for (size_t i = 0; i < frags.size(); ++i) {
      if (i != maxFragIdx) {
        BOOST_LOG(rdWarningLog) << name << " removed fragment i=" << i
                                << " with " << frags[i].get()->getNumAtoms()
                                << " atoms" << std::endl;
      }
    }
  }

  // we need to save chirality for checking later
  bool checkChiral = false;
  if (mol.hasProp(RDKit::common_properties::_MolFileChiralFlag)) {
    unsigned int chiralflag =
        mol.getProp<unsigned int>(RDKit::common_properties::_MolFileChiralFlag);
    frags[maxFragIdx].get()->setProp<unsigned int>(
        RDKit::common_properties::_MolFileChiralFlag, chiralflag);
    checkChiral = chiralflag != 0;
  }

  mol = *frags[maxFragIdx].get();

  // We need to see if the mol file's chirality possibly came from this
  //  fragment.
  if (checkChiral) {
    bool ischiral = false;

    RWMol copy(mol);
    try {
      MolOps::sanitizeMol(copy);
      ClearSingleBondDirFlags(copy);
      MolOps::detectBondStereochemistry(copy);
      MolOps::assignStereochemistry(copy, true, true, true);
      for (ROMol::AtomIterator atIt = copy.beginAtoms();
           atIt != copy.endAtoms(); ++atIt) {
        if ((*atIt)->hasProp(common_properties::_ChiralityPossible)) {
          ischiral = true;
          checkChiral = false;
          break;
        }
      }
    } catch (...) {
    }

    // are chiral tags set
    if (checkChiral) {
      for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
           ++atIt) {
        if ((*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
            (*atIt)->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) {
          ischiral = true;
          break;
        }
      }

      for (ROMol::BondIterator bondIt = mol.beginBonds();
           bondIt != mol.endBonds(); ++bondIt) {
        if ((*bondIt)->getBondDir() == Bond::BEGINDASH ||
            (*bondIt)->getBondDir() == Bond::BEGINWEDGE) {
          ischiral = true;
          break;
        }
      }
    }

    if (!ischiral) {
      mol.setProp<unsigned int>(RDKit::common_properties::_MolFileChiralFlag,
                                0);
    }
  }
  return true;
}

}  // namespace StructureCheck
}  // namespace RDKit