File: DiversityPick.h

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (47 lines) | stat: -rw-r--r-- 1,449 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include <RDGeneral/export.h>
#include <list>
#include <map>
#include <DataStructs/BitOps.h>
#include <DataStructs/ExplicitBitVect.h>
#include <SimDivPickers/MaxMinPicker.h>
#include <RDGeneral/Exceptions.h>

namespace {
class taniFunctor {
 public:
  taniFunctor(const std::vector<ExplicitBitVect> &ebvs, bool useCache) : df_useCache(useCache), d_ebvs(ebvs) {}
  double operator()(unsigned int i, unsigned int j) {
    double res;
    if(df_useCache){
      std::pair<unsigned int, unsigned int> idxPair(i, j);
      if (this->d_cache.count(idxPair) > 0) {
        res = this->d_cache[idxPair];
      } else {
        res = 1. - TanimotoSimilarity(d_ebvs[i], d_ebvs[j]);
        this->d_cache[idxPair] = res;
      }
    } else {
      res = 1. - TanimotoSimilarity(d_ebvs[i], d_ebvs[j]);
    }
    return res;
  }

 private:
  bool df_useCache;
  const std::vector<ExplicitBitVect> &d_ebvs;
  std::map<std::pair<unsigned int, unsigned int>, double> d_cache;
};
}

std::vector<int> pickUsingFingerprints(
    const std::vector<ExplicitBitVect> &ebvs, unsigned int nToPick,
    int seed = -1, std::vector<int> firstPicks = std::vector<int>(), bool useCache=true) {
  if (nToPick >= ebvs.size())
    throw ValueErrorException("nToPick is larger than the vector size");
  std::vector<int> res;

  RDPickers::MaxMinPicker picker;
  taniFunctor ftor(ebvs,useCache);
  res = picker.lazyPick(ftor, ebvs.size(), nToPick, firstPicks, seed);
  return res;
}