File: RGroupFingerprintScore.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (80 lines) | stat: -rw-r--r-- 2,930 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
//
//  Copyright (C) 2020 Gareth Jones, Glysade LLC
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//

#ifndef RDKIT_RGROUPFINGERPRINTSCORE_H
#define RDKIT_RGROUPFINGERPRINTSCORE_H

#include "RGroupMatch.h"
#include <vector>

namespace RDKit {

// class to hold the bitcounts for an attachment point/rgroup label
struct VarianceDataForLabel {
  // rgroup label
  const int label;
  // number of structures attached here
  int numberFingerprints;
  // bitcounts - size fingerprint size, each position is the count of bits set
  // over the fingerprints for all the structures
  std::vector<int> bitCounts;

  VarianceDataForLabel(const int &label, int numberFingerprints,
                       std::vector<int> bitCounts);
  VarianceDataForLabel(const int &label);
  VarianceDataForLabel(const VarianceDataForLabel &other) = default;
  VarianceDataForLabel &operator=(const VarianceDataForLabel &other) = delete;
  // add an rgroup structure to a bit counts array
  void addRgroupData(RGroupData *rgroupData);
  // remove an rgroup structure to a bit counts array
  void removeRgroupData(RGroupData *rgroupData);
  // calculate the mean variance for a bit counts array
  double variance() const;
};

struct FingerprintVarianceScoreData {
  size_t numberOfMissingUserRGroups = 0;
  size_t numberOfMolecules = 0;
  std::map<int, std::shared_ptr<VarianceDataForLabel>> labelsToVarianceData;

  // calculates fingerprint variance score from rgroup bit counts
  double fingerprintVarianceGroupScore();

  // Adds a molecule match to the rgroup fingerprint bit counts
  // vectors
  void addVarianceData(int matchNumber, int permutationNumber,
                       const std::vector<std::vector<RGroupMatch>> &matches,
                       const std::set<int> &labels);

  // Subtracts a molecule match from the rgroup fingerprint bit counts
  // vectors
  void removeVarianceData(int matchNumber, int permutationNumber,
                          const std::vector<std::vector<RGroupMatch>> &matches,
                          const std::set<int> &labels);

  void clear();

 private:
  void modifyVarianceData(int matchNumber, int permutationNumber,
                          const std::vector<std::vector<RGroupMatch>> &matches,
                          const std::set<int> &labels, bool add);
};

// The arithmetic mean of the mean fingerprint bit variances for the
// fingerprints at each rgroup position.
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(
    const std::vector<size_t> &permutation,
    const std::vector<std::vector<RGroupMatch>> &matches,
    const std::set<int> &labels,
    FingerprintVarianceScoreData *fingerprintVarianceScoreData = nullptr);

}  // namespace RDKit

#endif  // RDKIT_RGROUPFINGERPRINTSCORE_H