1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
//
// 2019, Daniel Probst, Reymond Group @ University of Bern
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/RDLog.h>
#include <GraphMol/RDKitBase.h>
#include <RDGeneral/test.h>
#include <RDGeneral/utils.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/Fingerprints/MHFP.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/Fingerprints/MHFP.h>
using namespace RDKit;
void testMHFPInit() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Test MHFP fingerprint encoder initialization"
<< std::endl;
std::string s = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C";
std::string t = "Cn1cnc2c1c(=O)[nH]c(=O)n2C";
ROMol *mol_s = SmilesToMol(s);
ROMol *mol_t = SmilesToMol(t);
MHFPFingerprints::MHFPEncoder enc(128, 42);
auto fp_s = enc.Encode(s);
auto fp_t = enc.Encode(t);
auto fp_mol_s = enc.Encode(*mol_s);
auto fp_mol_t = enc.Encode(*mol_t);
TEST_ASSERT(fp_s.size() == 128);
TEST_ASSERT(fp_s[0] == fp_mol_s[0]);
TEST_ASSERT(fp_t[127] == fp_mol_t[127]);
delete mol_s;
delete mol_t;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testMHFPHashing() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Test MHFP hashing of string and uint arrays"
<< std::endl;
MHFPFingerprints::MHFPEncoder enc(8);
std::vector<uint32_t> input_a = {1, 2, 4, 5, 6, 7, 8, 9};
std::vector<uint32_t> output_a = {188049437, 364485576, 737251017,
810894466, 300249621, 154369992,
2221926165, 283729444};
TEST_ASSERT(enc.FromArray(input_a) == output_a);
std::vector<std::string> input_b = {"a", "b", "c", "d", "e", "f"};
std::vector<uint32_t> output_b = {631555539, 835857365, 445245415, 4162827301,
955545975, 943207071, 712975995, 363547692};
TEST_ASSERT(enc.FromStringArray(input_b) == output_b);
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testMHFPShingling() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Test MHFP shingling creation" << std::endl;
std::string s = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C";
std::string t = "Cn1cnc2c1c(=O)[nH]c(=O)n2C";
ROMol *mol_s = SmilesToMol(s);
MHFPFingerprints::MHFPEncoder enc;
auto fp_s = enc.Encode(s);
auto fp_t = enc.Encode(t);
auto sh_a = enc.CreateShingling(s);
auto sh_b = enc.CreateShingling(*mol_s);
TEST_ASSERT(sh_a.size() == 44);
TEST_ASSERT(sh_b.size() == 44);
TEST_ASSERT(enc.CreateShingling(s, 3, false).size() == 42);
TEST_ASSERT(enc.CreateShingling(s, 3, true, false, true, 0).size() == 58);
delete mol_s;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testMHFPSECFP() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Test SECFP fingerprint functionality" << std::endl;
std::string s = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C";
ROMol *mol_s = SmilesToMol(s);
MHFPFingerprints::MHFPEncoder enc;
auto fp_s = enc.EncodeSECFP(s, 3, true, false, true, 1, 16);
auto fp_mol_s = enc.EncodeSECFP(*mol_s, 3, true, false, true, 1, 16);
TEST_ASSERT(fp_s.size() == 16);
TEST_ASSERT(fp_s[10]);
TEST_ASSERT(fp_s[15]);
delete mol_s;
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
void testMHFPDistance() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Test MHFP shingling creation" << std::endl;
std::string s = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C";
std::string t = "Cn1cnc2c1c(=O)[nH]c(=O)n2C";
MHFPFingerprints::MHFPEncoder enc;
auto fp_s = enc.Encode(s);
auto fp_t = enc.Encode(t);
TEST_ASSERT(feq(MHFPFingerprints::MHFPEncoder::Distance(fp_s, fp_s), 0.0));
TEST_ASSERT(
feq(MHFPFingerprints::MHFPEncoder::Distance(fp_s, fp_t), 0.7109375));
BOOST_LOG(rdErrorLog) << " done" << std::endl;
}
int main(int argc, char *argv[]) {
(void)argc;
(void)argv;
RDLog::InitLogs();
testMHFPInit();
testMHFPShingling();
testMHFPHashing();
testMHFPSECFP();
return 0;
}
|