File: UnitTestSimScreener.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (105 lines) | stat: -rwxr-xr-x 4,177 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#
#  Copyright (C) 2003  Greg Landrum and Rational Discovery LLC
#
"""unit testing code for the SimilarityScreeners

"""
import unittest

from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem.Fingerprints import SimilarityScreener


def fingerprinter(mol):
  return Chem.RDKFingerprint(mol, minPath=2, maxPath=7, fpSize=2048)


class TestCase(unittest.TestCase):

  def test_SimilarityScreener(self):
    probe = fingerprinter(Chem.MolFromSmiles('C1OCCCC1'))

    screener = SimilarityScreener.SimilarityScreener()
    self.assertEqual(screener.probe, None)
    screener.SetProbe(probe)
    self.assertEqual(screener.probe, probe)

    screener = SimilarityScreener.SimilarityScreener(probe=probe, fingerprinter=fingerprinter)
    self.assertEqual(screener.probe, probe)

    mol = Chem.MolFromSmiles('CCCN')
    self.assertEqual(fingerprinter(mol), screener.GetSingleFingerprint(mol))

    screener.Reset()

  def test1_TopNScreener(self):
    smis = ['C1CCCCC1', 'C1OCCCC1', 'C1NCCCC1', 'c1ccccc1', 'C1C(C)CCCC1', 'C1C(C)C(C)CCC1']
    suppl = Chem.SmilesMolSupplierFromText('\n'.join(smis), delimiter=",", smilesColumn=0,
                                           nameColumn=-1, titleLine=0)
    metric = DataStructs.TanimotoSimilarity
    mol = Chem.MolFromSmiles('C1OCCCC1')
    probe = fingerprinter(mol)

    screener = SimilarityScreener.TopNScreener(3, probe=probe, metric=metric,
                                               fingerprinter=fingerprinter, dataSource=suppl)
    self.assertIsNone(screener.topN)
    matches1 = [x for x in screener]
    self.assertIsNotNone(screener.topN)

    self.assertEqual(len(matches1), 3)
    matches2 = [x for x in screener]
    self.assertEqual(len(matches2), 3)
    self.assertEqual(matches1, matches2)

    self.assertEqual(probe, screener.GetSingleFingerprint(mol))

    # Getting the length also triggers the execution of the screen
    screener = SimilarityScreener.TopNScreener(3, probe=probe, metric=metric,
                                               fingerprinter=fingerprinter, dataSource=suppl)
    self.assertIsNone(screener.topN)
    self.assertEqual(len(screener), 3)
    self.assertIsNotNone(screener.topN)

    # as does accessing elements by index
    screener = SimilarityScreener.TopNScreener(3, probe=probe, metric=metric,
                                               fingerprinter=fingerprinter, dataSource=suppl)
    self.assertIsNone(screener.topN)
    screener[1]
    self.assertIsNotNone(screener.topN)

  def test2_ThresholdScreener(self):
    smis = ['C1CCCCC1', 'C1OCCCC1', 'C1NCCCC1', 'c1ccccc1', 'C1C(C)CCCC1', 'C1C(C)C(C)CCC1']
    suppl = Chem.SmilesMolSupplierFromText('\n'.join(smis), delimiter=",", smilesColumn=0,
                                           nameColumn=-1, titleLine=0)

    metric = DataStructs.TanimotoSimilarity
    probe = fingerprinter(Chem.MolFromSmiles('C1OCCCC1'))

    screener = SimilarityScreener.ThresholdScreener(0.09, probe=probe, metric=metric,
                                                    fingerprinter=fingerprinter, dataSource=suppl)
    matches1 = [x[0] for x in screener]
    self.assertEqual(len(matches1), 5)
    matches2 = [x[0] for x in screener]
    self.assertEqual(len(matches2), 5)
    self.assertEqual(matches1, matches2)

  def test3_ThresholdScreener_folding(self):
    smis = ['C1CCCCC1', 'C1OCCCC1', 'C1NCCCC1', 'c1ccccc1', 'C1C(C)CCCC1', 'C1C(C)C(C)CCC1']
    suppl = Chem.SmilesMolSupplierFromText('\n'.join(smis), delimiter=",", smilesColumn=0,
                                           nameColumn=-1, titleLine=0)

    metric = DataStructs.TanimotoSimilarity
    probe = Chem.RDKFingerprint(Chem.MolFromSmiles('C1OCCCC1'), minPath=2, maxPath=7, fpSize=4096)

    screener = SimilarityScreener.ThresholdScreener(0.09, probe=probe, metric=metric,
                                                    fingerprinter=fingerprinter, dataSource=suppl)
    matches1 = [x[0] for x in screener]
    self.assertEqual(len(matches1), 5)
    matches2 = [x[0] for x in screener]
    self.assertEqual(len(matches2), 5)
    self.assertEqual(matches1, matches2)


if __name__ == '__main__':  # pragma: nocover
  unittest.main()