File: RGroupData.h

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (120 lines) | stat: -rw-r--r-- 3,533 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
//
//  Copyright (C) 2017 Novartis Institutes for BioMedical Research
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#ifndef RGROUP_DATA
#define RGROUP_DATA

#include "../RDKitBase.h"
#include "RGroupUtils.h"
#include <GraphMol/SmilesParse/SmilesWrite.h>  
#include <GraphMol/Substruct/SubstructMatch.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h> 
#include <boost/scoped_ptr.hpp>
#include <set>
#include <vector>


namespace RDKit
{

//! A single rgroup attached to a given core.  
struct RGroupData {
  boost::shared_ptr<RWMol> combinedMol;
  std::vector<boost::shared_ptr<ROMol>> mols;  // All the mols in the rgroup
  std::set<std::string> smilesSet;             // used for rgroup equivalence
  std::string smiles;                          // smiles for all the mols in the rgroup (with attachments)
  std::set<int> attachments;                   // core attachment points
  bool is_hydrogen = false;
  bool single_fragment = true;
  bool multiple_attachments = false;
  bool is_linker = false;
  bool labelled = false;

 private:
  RGroupData(const RGroupData &rhs);

 public:
  RGroupData() : combinedMol(), mols(), smilesSet(), smiles(), attachments() {}

  void add(boost::shared_ptr<ROMol> newMol,
           const std::vector<int> &rlabel_attachments) {
    // some fragments can be add multiple times if they are cyclic
    for (auto &mol : mols) {
      if (newMol.get() == mol.get()) {
        return;
      }
    }

    labelled = false;
    std::copy(rlabel_attachments.begin(), rlabel_attachments.end(),
              std::inserter(attachments, attachments.end()));

    mols.push_back(newMol);
    std::string smi = MolToSmiles(*newMol, true);
    // REVIEW: we probably shouldn't be using a set here... the merging of
    // duplicates is likely not what we want
    smilesSet.insert(smi);
    if (!combinedMol.get()) {
      combinedMol = boost::shared_ptr<RWMol>(new RWMol(*mols[0].get()));
    } else {
      ROMol *m = combineMols(*combinedMol.get(), *newMol.get());
      single_fragment = false;
      m->updateProps(*combinedMol.get());
      combinedMol.reset(new RWMol(*m));
      delete m;
    }
    smiles = getSmiles();
    combinedMol->setProp(common_properties::internalRgroupSmiles, smiles);
    computeIsHydrogen();
    is_linker = single_fragment && attachments.size() > 1;
  }

  std::map<int, int> getNumBondsToRlabels() const {
    std::map<int, int> rlabelsUsedCount;

    for (ROMol::AtomIterator atIt = combinedMol->beginAtoms();
         atIt != combinedMol->endAtoms(); ++atIt) {
      Atom *atom = *atIt;
      int rlabel;
      if (atom->getPropIfPresent<int>(RLABEL, rlabel)) {
        rlabelsUsedCount[rlabel] += 1;
      }
    }
    return rlabelsUsedCount;
  }

 private:
  void computeIsHydrogen() {  // is the rgroup all Hs
    for (const auto &mol : mols) {
      for (ROMol::AtomIterator atIt = mol->beginAtoms();
           atIt != mol->endAtoms(); ++atIt) {
        if ((*atIt)->getAtomicNum() > 1) {
	  is_hydrogen = false;
	  return;
        }
      }
    }
    is_hydrogen = true;
  }

  //! compute the canonical smiles for the attachments (bug: removes dupes since we are using a set...)
  std::string getSmiles() const {
    std::string s;
    for (const auto &it : smilesSet) {
      if (s.length()) {
        s += ".";
      }
      s += it;
    }
    return s;
  }
};
}

#endif