File: CorrMatGenerator.h

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (115 lines) | stat: -rw-r--r-- 3,187 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//
//  Copyright (C) 2003-2006 Rational Discovery LLC
//
//  @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef _RD_CORRMATGENERATOR_H_
#define _RD_CORRMATGENERATOR_H_

#include <RDGeneral/types.h>
#include <DataStructs/BitVects.h>
#include <boost/dynamic_bitset.hpp>

namespace RDInfoTheory {
// FIX: won't worry about it now, but this class can be templated by the type of
// container for the bit list and type of descriptors (fingerprint vs. real
// valued)
class BitCorrMatGenerator {
  /*! \brief A class to generate a correlation matrix for a bunch of
   *fingerprints
   *
   *  The correlation matrix is done only for the bit IDs that are set by a call
   *to the
   *  function setDescriptorIdList
   *
   *    cr = CorrMatGenerator();
   *    cr.setDescriptorIdList(descList);
   *    for each fingerprint in list of fingerprints {
   *        cr.collectVotes(fingerprint);
   *    }
   *    double *corrMat = cr.getCorrMat()
   *
   *  The resulting correlation matrix is a one dimension matrix with only the
   *lower triangle elements
   *  of the symmetric matrix
   */
 public:
  BitCorrMatGenerator() { this->initGenerator(); };

  ~BitCorrMatGenerator() { delete[] dp_corrMat; }

  void initGenerator() {
    dp_corrMat = 0;
    d_descs.resize(0);
    d_nExamples = 0;
  };

  /*! \brief Set the list bits that we are interested in correlating
   *
   *  \param bitIdList is a list of bit ids that need to be correlated e.g. a
   *list top ranked ensemble
   *  of bits
   */
  void setBitIdList(const RDKit::INT_VECT &bitIdList) {
    d_descs = bitIdList;
    int i, nd = d_descs.size();
    int nelem = nd * (nd - 1) / 2;
    delete[] dp_corrMat;

    dp_corrMat = new double[nd * (nd - 1) / 2];
    for (i = 0; i < nelem; i++) {
      dp_corrMat[i] = 0.0;
    }
  };

  //! \brief get the number of examples we used so far to compute the
  //correlation matrix
  int getNumExamples() const { return d_nExamples; };

  //! \brief Get the list of bits ID that are used to generate the correlation
  //matrix
  RDKit::INT_VECT getCorrBitList() const { return d_descs; };

  //! \brief Gets a pointer to the correlation matrix
  double *getCorrMat() { return dp_corrMat; };

  //! \brief For each pair of on bits (bi, bj) in fp increase the correlation
  //count
  //    for the pair by 1
  void collectVotes(const BitVect &fp) {
    unsigned int nd = d_descs.size();
    // use a temporary bit vector to first mask the fingerprint
    ExplicitBitVect ebv(nd);
    int bi;
    for (unsigned int i = 0; i < nd; i++) {
      bi = d_descs[i];
      if (fp[bi]) {
        ebv.setBit(i);
      }
    }
    for (unsigned i = 1; i < nd; i++) {
      unsigned int itab = i * (i - 1) / 2;
      if (ebv[i]) {
        for (unsigned int j = 0; j < i; j++) {
          if (ebv[j]) {
            dp_corrMat[itab + j] += 1;
          }
        }
      }
    }
    d_nExamples++;
  };

 private:
  RDKit::INT_VECT d_descs;
  double *dp_corrMat;
  int d_nExamples;
};
}

#endif