File: DiversityPick.h

package info (click to toggle)
rdkit 201603.5-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 72,364 kB
  • ctags: 18,217
  • sloc: cpp: 167,966; python: 58,855; java: 5,318; ansic: 5,239; sql: 1,908; yacc: 1,553; lex: 1,131; makefile: 418; xml: 229; sh: 192; fortran: 183; cs: 93
file content (46 lines) | stat: -rw-r--r-- 1,419 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include <list>
#include <map>
#include <DataStructs/BitOps.h>
#include <DataStructs/ExplicitBitVect.h>
#include <SimDivPickers/MaxMinPicker.h>
#include <RDGeneral/Exceptions.h>

namespace {
class taniFunctor {
 public:
  taniFunctor(const std::vector<ExplicitBitVect> &ebvs, bool useCache) : df_useCache(useCache), d_ebvs(ebvs) {}
  double operator()(unsigned int i, unsigned int j) {
    double res;
    if(df_useCache){
      std::pair<unsigned int, unsigned int> idxPair(i, j);
      if (this->d_cache.count(idxPair) > 0) {
        res = this->d_cache[idxPair];
      } else {
        res = 1. - TanimotoSimilarity(d_ebvs[i], d_ebvs[j]);
        this->d_cache[idxPair] = res;
      }
    } else {
      res = 1. - TanimotoSimilarity(d_ebvs[i], d_ebvs[j]);
    }
    return res;
  }

 private:
  bool df_useCache;
  const std::vector<ExplicitBitVect> &d_ebvs;
  std::map<std::pair<unsigned int, unsigned int>, double> d_cache;
};
}

std::vector<int> pickUsingFingerprints(
    const std::vector<ExplicitBitVect> &ebvs, unsigned int nToPick,
    int seed = -1, std::vector<int> firstPicks = std::vector<int>(), bool useCache=true) {
  if (nToPick >= ebvs.size())
    throw ValueErrorException("nToPick is larger than the vector size");
  std::vector<int> res;

  RDPickers::MaxMinPicker picker;
  taniFunctor ftor(ebvs,useCache);
  res = picker.lazyPick(ftor, ebvs.size(), nToPick, firstPicks, seed);
  return res;
}