File: RGroupCore.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (118 lines) | stat: -rw-r--r-- 4,063 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
//
//  Copyright (C) 2020-2021 Novartis Institutes for BioMedical Research and
//  other RDKit contributors
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#ifndef RGROUP_CORE
#define RGROUP_CORE

#include <GraphMol/SmilesParse/SmartsWrite.h>
#include "../RDKitBase.h"
#include "RGroupUtils.h"
#include "GraphMol/Substruct/SubstructMatch.h"

// #define VERBOSE 1

namespace RDKit {
class TautomerQuery;

//! RCore is the core common to a series of molecules
struct RCore {
  boost::shared_ptr<RWMol> core;
  // core with terminal user R groups stripped for matching
  boost::shared_ptr<RWMol> matchingMol;
  boost::shared_ptr<RWMol> labelledCore;

  // Bitset: indices corresponding to atoms bearing user-defined labels are 1
  boost::dynamic_bitset<> core_atoms_with_user_labels;
  // Number of user labelled rgroups in the core
  size_t numberUserRGroups = 0;
  RCore() {}
  RCore(const RWMol &c) : core(new RWMol(c)) { init(); }

  void init();

  inline bool isCoreAtomUserLabelled(int idx) const {
    return core_atoms_with_user_labels.test(idx);
  }

  void countUserRGroups() {
    numberUserRGroups = core_atoms_with_user_labels.count();
  }

  // Find all the core atoms that have user
  // label and set their indices to 1 into core_atoms_with_user_labels
  void findIndicesWithRLabel();

  // Return a copy of core where dummy atoms are replaced by
  // the respective matching atom in mol, while other atoms have
  // their aromatic flag and formal charge copied from
  // the respective matching atom in mol
  ROMOL_SPTR replaceCoreAtomsWithMolMatches(const ROMol &mol,
                                            const MatchVectType &match) const;

  // Final core returned to user, created by extracting core from target
  // molecule
  RWMOL_SPTR extractCoreFromMolMatch(
      const ROMol &mol, const MatchVectType &match,
      const RGroupDecompositionParameters &params) const;

  std::vector<MatchVectType> matchTerminalUserRGroups(
      const RWMol &target, MatchVectType match,
      const SubstructMatchParameters &sssParams) const;

  std::shared_ptr<TautomerQuery> getMatchingTautomerQuery();

  inline bool isTerminalRGroupWithUserLabel(const int idx) const {
    return terminalRGroupAtomToNeighbor.find(idx) !=
           terminalRGroupAtomToNeighbor.end();
  }

  /*
   * For when onlyMatchAtRGroups = true.  Checks the query core can satisfy all
   * attachment points. Including when two user defined attachment points can
   * match the same target atom.
   */
  [[deprecated("please use checkAllBondsToRGroupPresent")]]
  bool checkAllBondsToAttachmentPointPresent(
      const ROMol &mol, const int attachmentIdx,
      const MatchVectType &mapping) const;

  /*
   * For when onlyMatchAtRGroups = true.  Checks the query core can satisfy all
   * attachment points. Including when two user defined attachment points can
   * match the same target atom.
   */
  bool checkAllBondsToRGroupPresent(
      const ROMol &mol, const int attachmentIdx,
      const std::vector<std::vector<int>> &targetToCoreIndices) const;

 private:
  // The set of atom indices in the core for terminal R groups with atom indices
  // with or without user labels
  std::set<int> terminalRGroupAtoms;
  // An atom index map of terminal R groups to their heavy atom neighbor
  std::map<int, int> terminalRGroupAtomToNeighbor;
  // TautomerQuery for matching
  bool checkedForTautomerQuery = false;
  std::shared_ptr<TautomerQuery> matchingTautomerQuery = nullptr;

  void replaceCoreAtom(RWMol &mol, Atom &atom, const Atom &other) const;

  // Convert a matching molecule index to a core index
  int matchingIndexToCoreIndex(int matchingIndex) const;

  // Build the matching molecule (core minus user R groups)
  void buildMatchingMol();

  // Add attachment points to unlabelled R Groups
  void addDummyAtomsToUnlabelledCoreAtoms();
};

}  // namespace RDKit
#endif