File: Clustering.cpp

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (157 lines) | stat: -rw-r--r-- 5,031 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// $Id$
//
//  Copyright (C) 2002-2010 Greg Landrum and Rational Discovery LLC
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#define PY_ARRAY_UNIQUE_SYMBOL Py_Array_API_Clustering

#include <RDBoost/Wrap.h>
#include <boost/cstdint.hpp>

namespace python = boost::python;

#include <RDBoost/import_array.h>

typedef double real;

extern "C" void distdriver_(boost::int64_t *n, boost::int64_t *len, real *dists,
                            boost::int64_t *toggle, boost::int64_t *ia,
                            boost::int64_t *ib, real *crit);

//
// Rather than deal with any nonsense like trying to get
// the distance matrix built properly on the f2c side of things
// (thus drowning in the waves of f2c hate), we'll generate
// the distance matrix on our own here and then call distdriver_
//
void clusterit(real *dataP, boost::int64_t n, boost::int64_t m,
               boost::int64_t iopt, boost::int64_t *ia, boost::int64_t *ib,
               real *crit) {
  real *dists;
  boost::int64_t len;
  boost::int64_t pos = 0;
  boost::int64_t i, j, k, iTab, jTab;
  double tmp;
  len = (n * (n - 1)) / 2;
  dists = (real *)calloc(len, sizeof(real));
  for (i = 1; i < n; i++) {
    iTab = i * m;
    for (j = 0; j < i; j++) {
      jTab = j * m;
      for (k = 0; k < m; k++) {
        tmp = dataP[iTab + k] - dataP[jTab + k];
        dists[pos] += tmp * tmp;
      }
      pos++;
    }
  }
  distdriver_(&n, &len, dists, &iopt, ia, ib, crit);
  free(dists);
};

static PyObject *Clustering_MurtaghCluster(python::object data, int nPts,
                                           int sz, int option) {
  PyArrayObject *dataContig;
  boost::int64_t *ia, *ib;
  real *crit;
  PyObject *res;
  PyObject *tmp;
  npy_intp dims[2];

  if (PyArray_Check(data.ptr())) {
    dataContig = reinterpret_cast<PyArrayObject *>(
        PyArray_ContiguousFromObject(data.ptr(), NPY_DOUBLE, 2, 2));
  } else {
    throw_value_error("PyArray_Type expected as input");
    return nullptr;
  }

  ia = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
  ib = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
  crit = (real *)calloc(nPts, sizeof(real));

  clusterit((real *)PyArray_DATA(dataContig), nPts, sz, option, ia, ib, crit);

  dims[0] = nPts;
  res = PyTuple_New(3);

  //  NOTE: these operations maintain pointers to the respective arrays,
  //  that's why it's ok that we do not free them in this function,
  //  Python will take care of it for us.
  //
  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ia);
  PyTuple_SetItem(res, 0, (PyObject *)tmp);

  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ib);
  PyTuple_SetItem(res, 1, (PyObject *)tmp);

  tmp = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, (void *)crit);
  PyTuple_SetItem(res, 2, (PyObject *)tmp);

  return res;
};

void distclusterit(real *dists, boost::int64_t n, boost::int64_t iopt,
                   boost::int64_t *ia, boost::int64_t *ib, real *crit) {
  boost::int64_t len;

  len = (n * (n - 1)) / 2;
  distdriver_(&n, &len, dists, &iopt, ia, ib, crit);
};

static PyObject *Clustering_MurtaghDistCluster(python::object data, int nPts,
                                               int option) {
  PyArrayObject *dataContig;
  boost::int64_t *ia, *ib;
  real *crit;
  PyObject *res = PyTuple_New(3);
  PyObject *tmp;
  npy_intp dims[] = {1};

  if (PyArray_Check(data.ptr())) {
    dataContig = reinterpret_cast<PyArrayObject *>(
        PyArray_ContiguousFromObject(data.ptr(), NPY_DOUBLE, 1, 1));
  } else {
    throw_value_error("PyArray_Type expected as input");
    return nullptr;
  }

  ia = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
  ib = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
  crit = (real *)calloc(nPts, sizeof(real));
  distclusterit((real *)PyArray_DATA(dataContig), nPts, option, ia, ib, crit);

  dims[0] = nPts;

  //
  //  NOTE: these operations maintain pointers to the respective arrays,
  //  that's why it's ok that we do not free them in this function,
  //  Python will take care of it for us.
  //
  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ia);
  PyTuple_SetItem(res, 0, tmp);

  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ib);
  PyTuple_SetItem(res, 1, tmp);

  tmp = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, (void *)crit);
  PyTuple_SetItem(res, 2, tmp);

  return res;
};

BOOST_PYTHON_MODULE(Clustering) {
  rdkit_import_array();

  python::def("MurtaghCluster", Clustering_MurtaghCluster,
              (python::arg("data"), python::arg("nPts"), python::arg("sz"),
               python::arg("option")),
              "TODO: provide docstring");
  python::def("MurtaghDistCluster", Clustering_MurtaghDistCluster,
              (python::arg("data"), python::arg("nPts"), python::arg("option")),
              "TODO: provide docstring");
}