File: SparseIntVect.cpp

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (206 lines) | stat: -rw-r--r-- 7,949 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
// $Id$
//
//  Copyright (C) 2007-2008 Greg Landrum
//
//  @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDBoost/python.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/cstdint.hpp>
#include <RDGeneral/BoostEndInclude.h>

#include <RDGeneral/types.h>
#include <RDGeneral/Invariant.h>
#include <RDBoost/PySequenceHolder.h>
#include <DataStructs/SparseIntVect.h>

using namespace RDKit;

namespace {
template <typename IndexType>
python::object SIVToBinaryText(const SparseIntVect<IndexType> &siv) {
  std::string res = siv.toString();
  python::object retval = python::object(
      python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length())));
  return retval;
}
}

template <typename IndexType>
struct siv_pickle_suite : python::pickle_suite {
  static python::tuple getinitargs(const SparseIntVect<IndexType> &self) {
    return python::make_tuple(SIVToBinaryText(self));
  };
};

namespace {
template <typename IndexType>
void pyUpdateFromSequence(SparseIntVect<IndexType> &vect, python::object &seq) {
  PySequenceHolder<IndexType> seqL(seq);
  for (unsigned int i = 0; i < seqL.size(); ++i) {
    IndexType idx = seqL[i];
    vect.setVal(idx, vect[idx] + 1);
  }
}
template <typename IndexType>
python::dict pyGetNonzeroElements(SparseIntVect<IndexType> &vect) {
  python::dict res;
  auto iter = vect.getNonzeroElements().begin();
  while (iter != vect.getNonzeroElements().end()) {
    res[iter->first] = iter->second;
    ++iter;
  }
  return res;
}

template <typename T>
python::list BulkDice(const T &siv1, python::list sivs, bool returnDistance) {
  python::list res;
  unsigned int nsivs = python::extract<unsigned int>(sivs.attr("__len__")());
  for (unsigned int i = 0; i < nsivs; ++i) {
    double simVal;
    const T &siv2 = python::extract<T>(sivs[i])();
    simVal = DiceSimilarity(siv1, siv2, returnDistance);
    res.append(simVal);
  }
  return res;
}
template <typename T>
python::list BulkTanimoto(const T &siv1, python::list sivs,
                          bool returnDistance) {
  python::list res;
  unsigned int nsivs = python::extract<unsigned int>(sivs.attr("__len__")());
  for (unsigned int i = 0; i < nsivs; ++i) {
    double simVal;
    const T &siv2 = python::extract<T>(sivs[i])();
    simVal = TanimotoSimilarity(siv1, siv2, returnDistance);
    res.append(simVal);
  }
  return res;
}

template <typename T>
python::list BulkTversky(const T &siv1, python::list sivs, double a, double b,
                         bool returnDistance) {
  python::list res;
  unsigned int nsivs = python::extract<unsigned int>(sivs.attr("__len__")());
  for (unsigned int i = 0; i < nsivs; ++i) {
    double simVal;
    const T &siv2 = python::extract<T>(sivs[i])();
    simVal = TverskySimilarity(siv1, siv2, a, b, returnDistance);
    res.append(simVal);
  }
  return res;
}
}

std::string sparseIntVectDoc =
    "A container class for storing integer\n\
values within a particular range.\n\
\n\
The length of the vector is set at construction time.\n\
\n\
As you would expect, _SparseIntVects_ support a set of binary operations\n\
so you can do things like:\n\
  Arithmetic:\n\
  siv1 += siv2\n\
  siv3 = siv1 + siv2\n\
  siv1 -= siv3\n\
  siv3 = siv1 - siv2\n\
  \"Fuzzy\" binary operations:\n\
  siv3 = siv1 & siv2  the result contains the smallest value in each entry\n\
  siv3 = siv1 | siv2  the result contains the largest value in each entry\n\
\n\
Elements can be set and read using indexing (i.e. siv[i] = 4 or val=siv[i])\n\
\n";

struct sparseIntVec_wrapper {
  template <typename IndexType>
  static void wrapOne(const char *className) {
    python::class_<SparseIntVect<IndexType>,
                   boost::shared_ptr<SparseIntVect<IndexType> > >(
        className, sparseIntVectDoc.c_str(),
        python::init<IndexType>("Constructor"))
        .def(python::init<std::string>())
        // Note: we cannot support __len__ because, at least at the moment
        // (BPL v1.34.1), it must return an int.
        .def("__setitem__", &SparseIntVect<IndexType>::setVal,
             "Set the value at a specified location")
        .def("__getitem__", &SparseIntVect<IndexType>::getVal,
             "Get the value at a specified location")
        .def(python::self & python::self)
        .def(python::self | python::self)
        .def(python::self - python::self)
        .def(python::self -= python::self)
        .def(python::self + python::self)
        .def(python::self += python::self)
        .def(python::self == python::self)
        .def(python::self != python::self)
        //.def(python::self - int())
        .def(python::self -= int())
        //.def(python::self + int())
        .def(python::self += int())
        //.def(python::self / int())
        .def(python::self /= int())
        //.def(python::self * int())
        .def(python::self *= int())
        .def("GetTotalVal", &SparseIntVect<IndexType>::getTotalVal,
             (python::args("useAbs") = false),
             "Get the sum of the values in the vector, basically L1 norm")
        .def("GetLength", &SparseIntVect<IndexType>::getLength,
             "Returns the length of the vector")
        .def("ToBinary", &SIVToBinaryText<IndexType>,
             "returns a binary (pickle) representation of the vector")
        .def("UpdateFromSequence", &pyUpdateFromSequence<IndexType>,
             "update the vector based on the values in the list or tuple")
        .def("GetNonzeroElements", &pyGetNonzeroElements<IndexType>,
             "returns a dictionary of the nonzero elements")
        .def_pickle(siv_pickle_suite<IndexType>());

    python::def(
        "DiceSimilarity", &DiceSimilarity<IndexType>,
        (python::args("siv1"), python::args("siv2"),
         python::args("returnDistance") = false, python::args("bounds") = 0.0),
        "return the Dice similarity between two vectors");
    python::def("BulkDiceSimilarity", &BulkDice<SparseIntVect<IndexType> >,
                (python::args("v1"), python::args("v2"),
                 python::args("returnDistance") = false),
                "return the Dice similarities between one vector and a "
                "sequence of others");
    python::def(
        "TanimotoSimilarity", &TanimotoSimilarity<IndexType>,
        (python::args("siv1"), python::args("siv2"),
         python::args("returnDistance") = false, python::args("bounds") = 0.0),
        "return the Tanimoto similarity between two vectors");
    python::def("BulkTanimotoSimilarity",
                &BulkTanimoto<SparseIntVect<IndexType> >,
                (python::args("v1"), python::args("v2"),
                 python::args("returnDistance") = false),
                "return the Tanimoto similarities between one vector and a "
                "sequence of others");
    python::def("TverskySimilarity", &TverskySimilarity<IndexType>,
                (python::args("siv1"), python::args("siv2"), python::args("a"),
                 python::args("b"), python::args("returnDistance") = false,
                 python::args("bounds") = 0.0),
                "return the Tversky similarity between two vectors");
    python::def("BulkTverskySimilarity",
                &BulkTversky<SparseIntVect<IndexType> >,
                (python::args("v1"), python::args("v2"), python::args("a"),
                 python::args("b"), python::args("returnDistance") = false),
                "return the Tversky similarities between one vector and a "
                "sequence of others");
  }

  static void wrap() {
    wrapOne<boost::int32_t>("IntSparseIntVect");
    wrapOne<boost::int64_t>("LongSparseIntVect");
    wrapOne<boost::uint32_t>("UIntSparseIntVect");
    wrapOne<boost::uint64_t>("ULongSparseIntVect");
  }
};

void wrap_sparseIntVect() { sparseIntVec_wrapper::wrap(); }