File: rdFeatSelect.cpp

package info (click to toggle)
rdkit 201203-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 37,840 kB
  • sloc: cpp: 93,902; python: 51,897; java: 5,192; ansic: 3,497; xml: 2,499; sql: 1,641; yacc: 1,518; lex: 1,076; makefile: 325; fortran: 183; sh: 153; cs: 51
file content (95 lines) | stat: -rw-r--r-- 2,785 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// $Id: rdFeatSelect.cpp 1528 2010-09-26 17:04:37Z glandrum $
//
//  Copyright (C) 2005-2006 Rational Discovery LLC
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <cstring>
#include <RDBoost/Wrap.h>
#include <DataStructs/BitVects.h>
#include <DataStructs/BitOps.h>

namespace python = boost::python;

#include "fastentropy.h"

template <typename BV> 
PyObject *runCMIM(python::list examples,unsigned int nToTake){
  //std::cerr << "select " << std::endl;std::cerr.flush();
  unsigned int nExamples=python::extract<int>(examples.attr("__len__")());
  //std::cerr << "NEX " << nExamples << std::endl;std::cerr.flush();
  python::list example=python::extract<python::list>(examples[0]);
  const BV &tmpBV=python::extract<BV>(example[1]);
  unsigned int exSize=python::extract<int>(example.attr("__len__")());
  //std::cerr << "EXSIZE " << exSize << std::endl;std::cerr.flush();
  unsigned int nBits=tmpBV.getNumBits();
  //std::cerr << "nBits " << nBits << std::endl;std::cerr.flush();

  unsigned int sz=(nExamples+31)/32;
  uint32_t *y = new uint32_t[sz];
  memset(y,0,sizeof(uint32_t)*sz);
  uint32_t *raw = new uint32_t[sz*nBits];
  memset(raw,0,sizeof(uint32_t)*sz*nBits);
  uint32_t **x = new uint32_t *[nBits];
  for(unsigned int i=0;i<nBits;i++){
    x[i] = &raw[sz*i];
  }
  
  for(unsigned int i=0;i<nExamples;i++){
    example=python::extract<python::list>(examples[i]);
    fe_set_bit(i,y,python::extract<uint32_t>(example[exSize-1])>0);

    const BV &bv=python::extract<BV>(example[1]);
    //std::cerr << BitVectToText(bv) << std::endl;
    for(unsigned int j=0;j<nBits;j++){
      fe_set_bit(i,x[j],bv.getBit(j));
    }
  }

  int *sels=new int[nToTake];
  fe_selection_cmim(nExamples,nBits,x,y,nToTake,sels);
  delete [] y;y=0;
  delete [] x;x=0;
  delete [] raw;raw=0;

  PyObject *res = PyTuple_New(nToTake);
  for(unsigned int i=0;i<nToTake;i++){
    PyTuple_SetItem(res,i,PyInt_FromLong(sels[i]));
  }
  delete [] sels;sels=0;
  return res;

}

PyObject * selectCMIM(python::list &examples,unsigned int nToTake){
  PyObject *res=0;

  python::list example=python::extract<python::list>(examples[0]);
  
  python::extract<ExplicitBitVect> conv(example[1]);
  if(conv.check()){
    res = runCMIM<ExplicitBitVect>(examples,nToTake);
  } else {
    res = runCMIM<SparseBitVect>(examples,nToTake);
  }
  return res;
}



BOOST_PYTHON_MODULE(rdFeatSelect)
{
  fe_init_tables();
  python::scope().attr("__doc__") =
    "Module containing functions for feature selection"
    ;
  
  std::string docString="";

  python::def("selectCMIM", selectCMIM,
              docString.c_str());
}