File: RascalResult.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (158 lines) | stat: -rw-r--r-- 5,458 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
//
// Copyright (C) David Cosgrove 2023
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.

// A class to hold the results of a RASCAL MCES determination
// between 2 molecules.  Contains the bonds and atoms that
// correspond between the molecules, and also a SMARTS pattern
// defining the MCES.
//
#include <RDGeneral/export.h>

#ifndef RASCALRESULT_H
#define RASCALRESULT_H

#include <vector>

#include <GraphMol/ROMol.h>

namespace RDKit {

namespace RascalMCES {

class RDKIT_RASCALMCES_EXPORT RascalResult {
 public:
  RascalResult(const RDKit::ROMol &mol1, const RDKit::ROMol &mol2,
               const std::vector<std::vector<int>> &adjMatrix1,
               const std::vector<std::vector<int>> &adjMatrix2,
               const std::vector<unsigned int> &clique,
               const std::vector<std::pair<int, int>> &vtx_pairs, bool timedOut,
               bool swapped, double tier1Sim, double tier2Sim,
               bool ringMatchesRingOnly, bool singleLargestFrag, int minFragSep,
               bool exactConnectionsMatch = false,
               const std::string &equivalentAtoms = "",
               bool ignoreBondOrders = false);
  // For when the tier[12]Sim didn't hit the threshold, but it
  // might be of interest what the estimates of similarity were.
  RascalResult(double tier1Sim, double tier2Sim);

  RascalResult(const RascalResult &other);

  RascalResult(RascalResult &&other) = default;

  ~RascalResult() = default;

  RascalResult &operator=(const RascalResult &other);

  RascalResult &operator=(RascalResult &&other) = default;

  // Cut the result down to the single largest fragment.  This is
  // irrecoverably destructive.
  void largestFragOnly();
  void largestFragsOnly(unsigned int numFrags = 2);
  void trimSmallFrags(unsigned int minFragSize = 3);

  std::vector<std::pair<int, int>> getBondMatches() const {
    return d_bondMatches;
  }

  std::vector<std::pair<int, int>> getAtomMatches() const {
    return d_atomMatches;
  }

  // The following 5 functions are used in resultCompare to rank
  // 2 MCES of the same size for the same pair of molecules.
  // returns the number of contiguous fragments in the MCES.
  int getNumFrags() const;

  // returns how many bonds in the clique don't match
  // cyclic/non-cyclic i.e. count as a match in the MCES but
  // are ring bonds in one of the molecules and not in the other.
  int getRingNonRingBondScore() const;

  // returns a score for how well the atoms in the clique from mol1 match the
  // atoms for the clique in mol2.  Currently, the atom scores are the
  // difference in H count for matching atoms, and summed for the molecule. Its
  // so that, for example, an OH in mol1 that could match an OH or OMe matches
  // the OH for preference.
  int getAtomMatchScore() const;

  // returns a score for the maximum difference in through-bond distance for
  // pairs of matching atoms in the 2 molecules.  An MCES where 2 atoms
  // are far apart in one molecule and the corresponding atoms are close
  // together in the other will get a high score by this measure.
  int getMaxDeltaAtomAtomDist() const;

  // returns the number of atoms in the largest contiguous fragment
  // in the MCES.
  unsigned int getLargestFragSize() const;

  std::string getSmarts() const;
  const std::shared_ptr<ROMol> getMcesMol() const;
  bool getTimedOut() const { return d_timedOut; };

  double getTier1Sim() const { return d_tier1Sim; }
  double getTier2Sim() const { return d_tier2Sim; }
  double getSimilarity() const;

 private:
  std::shared_ptr<ROMol> d_mol1;
  std::shared_ptr<ROMol> d_mol2;
  mutable std::shared_ptr<ROMol> d_mcesMol;
  std::vector<std::pair<int, int>> d_bondMatches;
  std::vector<std::pair<int, int>> d_atomMatches;

  mutable std::string d_smarts;
  bool d_timedOut{false};
  double d_tier1Sim;
  double d_tier2Sim;
  bool d_ringMatchesRingOnly{false};
  int d_maxFragSep{-1};
  bool d_exactConnectionsMatch{false};
  std::string d_equivalentAtoms{""};
  bool d_ignoreBondOrders{false};

  // These are used for sorting the results.
  mutable int d_numFrags{-1};
  mutable int d_ringNonRingBondScore{-1};
  mutable int d_atomMatchScore{-1};
  mutable int d_maxDeltaAtomAtomDist{-1};
  mutable int d_largestFragSize{-1};

  // Assuming the frags are all part of the original MCES, just cut it
  // down to what's in the frags.
  void rebuildFromFrags(const std::vector<boost::shared_ptr<ROMol>> &frags);

  std::string createSmartsString() const;

  void matchCliqueAtoms(const std::vector<std::vector<int>> &mol1_adj_matrix);

  // If the clique involves a fragment that is more than d_maxFragSep from
  // any other frag in either molecule, discard the smaller frag.
  void applyMaxFragSep();

  // Make the fragments for either mol1 or mol2.  If molNum is not 1 or 2,
  // returns nullptr.
  RDKit::ROMol *makeMolFrags(int molNum) const;

  int calcRingNonRingScore() const;

  int calcAtomMatchScore() const;

  int calcLargestFragSize() const;

  // If there are multiple fragments, can be helpful as a tie-breaker.  It's the
  // maximum difference between through-bond distances between matching atoms in
  // the 2 molecules.
  int calcMaxDeltaAtomAtomDistScore() const;
};

}  // namespace RascalMCES
}  // namespace RDKit

#endif  // RASCALRESULT_H