File: CorrMatGenerator.h

package info (click to toggle)
rdkit 201203-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 37,840 kB
  • sloc: cpp: 93,902; python: 51,897; java: 5,192; ansic: 3,497; xml: 2,499; sql: 1,641; yacc: 1,518; lex: 1,076; makefile: 325; fortran: 183; sh: 153; cs: 51
file content (122 lines) | stat: -rw-r--r-- 3,403 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
//
//  Copyright (C) 2003-2006 Rational Discovery LLC
//
//  @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#ifndef _RD_CORRMATGENERATOR_H_
#define _RD_CORRMATGENERATOR_H_

#include <RDGeneral/types.h>
#include <DataStructs/BitVects.h>
#include <boost/dynamic_bitset.hpp>

namespace RDInfoTheory {
  //FIX: won't worry about it now, but this class can be templated by the type of 
  // container for the bit list and type of descriptors (fingerprint vs. real valued)
  class BitCorrMatGenerator {
    /*! \brief A class to generate a correlation matrix for a bunch of fingerprints
     *
     *  The correlation matrix is done only for the bit IDs that are set by a call to the 
     *  function setDescriptorIdList
     *  
     *    cr = CorrMatGenerator();
     *    cr.setDescriptorIdList(descList);
     *    for each fingerprint in list of fingerprints {
     *        cr.collectVotes(fingerprint);
     *    }
     *    double *corrMat = cr.getCorrMat()
     *  
     *  The resulting correlation matrix is a one dimension matrix with only the lower triangle elements
     *  of the symmetric matrix
     */
  public:
    BitCorrMatGenerator() {
      this->initGenerator();
    };

    ~BitCorrMatGenerator() {
      if (dp_corrMat) {
        delete [] dp_corrMat;
      }
    }

    void initGenerator() {
      dp_corrMat = 0;
      d_descs.resize(0);
      d_nExamples = 0;
    };

    /*! \brief Set the list bits that we are interested in correlating
     *
     *  \param bitIdList is a list of bit ids that need to be correlated e.g. a list top ranked ensemble 
     *  of bits 
     */
    void setBitIdList(const RDKit::INT_VECT &bitIdList) {
      d_descs = bitIdList;
      int i, nd = d_descs.size();
      int nelem = nd*(nd-1)/2;
      if (dp_corrMat != 0) {
        delete [] dp_corrMat;
      }
      dp_corrMat = new double[nd*(nd-1)/2];
      for (i = 0; i < nelem; i++) {
        dp_corrMat[i] = 0.0;
      }
    };

    //! \brief get the number of examples we used so far to compute the correlation matrix
    int getNumExamples() const {
      return d_nExamples;
    };

    //! \brief Get the list of bits ID that are used to generate the correlation matrix
    RDKit::INT_VECT getCorrBitList() const {
      return d_descs;
    };

    //! \brief Gets a pointer to the correlation matrix
    double *getCorrMat() {
      return dp_corrMat;
    };
    
    //! \brief For each pair of on bits (bi, bj) in fp increase the correlation count
    //    for the pair by 1
    void collectVotes(const BitVect &fp) {
      unsigned int nd = d_descs.size();
      // use a temporary bit vector to first mask the fingerprint
      ExplicitBitVect ebv(nd);
      int bi;
      for (unsigned int i = 0; i < nd; i++) {
        bi = d_descs[i];
        if (fp[bi]) {
          ebv.setBit(i);
        }
      }
      for (unsigned i = 1; i < nd; i++) {
        unsigned int itab = i*(i-1)/2;
        if (ebv[i]) {
          for (unsigned int j = 0; j < i; j++) {
            if ( ebv[j]) {
              dp_corrMat[itab + j] += 1;
            }
          }
        }
      }
      d_nExamples++;
    };

  private:
    RDKit::INT_VECT d_descs;
    double *dp_corrMat;
    int d_nExamples;
  };

}

#endif