File: HierarchicalClusterPicker.cpp

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (112 lines) | stat: -rw-r--r-- 4,369 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// $Id$
//
//  Copyright (C) 2003-2008  Greg Landrum and Rational Discovery LLC
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#define NO_IMPORT_ARRAY

#define PY_ARRAY_UNIQUE_SYMBOL rdpicker_array_API
#include <RDBoost/python.h>

#include <RDBoost/boost_numpy.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>
#include <RDBoost/Wrap.h>

#include <SimDivPickers/DistPicker.h>
#include <SimDivPickers/HierarchicalClusterPicker.h>

namespace python = boost::python;
namespace RDPickers {

// REVIEW: the poolSize can be pulled from the numeric array
RDKit::INT_VECT HierarchicalPicks(HierarchicalClusterPicker *picker,
                                  python::object &distMat, int poolSize,
                                  int pickSize) {
  if (pickSize >= poolSize) {
    throw ValueErrorException("pickSize must be less than poolSize");
  }
  if (!PyArray_Check(distMat.ptr())) {
    throw ValueErrorException("distance mat argument must be a numpy matrix");
  }

  PyArrayObject *copy;
  // it's painful to have to copy the input matrix, but the
  // picker itself will step on the distance matrix, so use
  // CopyFromObject here instead of ContiguousFromObject
  copy =
      (PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), NPY_DOUBLE, 1, 1);
  double *dMat = (double *)PyArray_DATA(copy);
  RDKit::INT_VECT res = picker->pick(dMat, poolSize, pickSize);
  Py_DECREF(copy);
  return res;
}

// REVIEW: the poolSize can be pulled from the numeric array
RDKit::VECT_INT_VECT HierarchicalClusters(HierarchicalClusterPicker *picker,
                                          python::object &distMat, int poolSize,
                                          int pickSize) {
  if (!PyArray_Check(distMat.ptr())) {
    throw ValueErrorException("distance mat argument must be a numpy matrix");
  }

  // REVIEW: check pickSize < poolSize, otherwise throw_value_error()
  PyArrayObject *copy;
  // it's painful to have to copy the input matrix, but the
  // picker itself will step on the distance matrix, so use
  // CopyFromObject here instead of ContiguousFromObject
  copy =
      (PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), NPY_DOUBLE, 1, 1);
  double *dMat = (double *)PyArray_DATA(copy);

  RDKit::VECT_INT_VECT res = picker->cluster(dMat, poolSize, pickSize);
  Py_DECREF(copy);
  return res;
}

struct HierarchCP_wrap {
  static void wrap() {
    std::string docString =
        "A class for diversity picking of items using Hierarchical "
        "Clustering\n";
    python::class_<HierarchicalClusterPicker>(
        "HierarchicalClusterPicker", docString.c_str(),
        python::init<HierarchicalClusterPicker::ClusterMethod>(
            python::args("clusterMethod")))
        .def("Pick", HierarchicalPicks,
             "Pick a diverse subset of items from a pool of items using "
             "hierarchical clustering\n"
             "\n"
             "ARGUMENTS: \n"
             "  - distMat: 1D distance matrix (only the lower triangle "
             "elements)\n"
             "  - poolSize: number of items in the pool\n"
             "  - pickSize: number of items to pick from the pool\n")
        .def("Cluster", HierarchicalClusters,
             "Return a list of clusters of item from the pool using "
             "hierachical clustering\n"
             "\n"
             "ARGUMENTS: \n"
             "  - distMat: 1D distance matrix (only the lower triangle "
             "elements)\n"
             "  - poolSize: number of items in the pool\n"
             "  - pickSize: number of items to pick from the pool\n");

    python::enum_<HierarchicalClusterPicker::ClusterMethod>("ClusterMethod")
        .value("WARD", HierarchicalClusterPicker::WARD)
        .value("SLINK", HierarchicalClusterPicker::SLINK)
        .value("CLINK", HierarchicalClusterPicker::CLINK)
        .value("UPGMA", HierarchicalClusterPicker::UPGMA)
        .value("MCQUITTY", HierarchicalClusterPicker::MCQUITTY)
        .value("GOWER", HierarchicalClusterPicker::GOWER)
        .value("CENTROID", HierarchicalClusterPicker::CENTROID)
        .export_values();
  };
};
}

void wrap_HierarchCP() { RDPickers::HierarchCP_wrap::wrap(); }