File: RascalClusterOptions.h

package info (click to toggle)
rdkit 202503.1-4
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 578; xml: 229; fortran: 183; sh: 105
file content (53 lines) | stat: -rw-r--r-- 2,593 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
//
// Copyright (C) David Cosgrove 2023
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
// Options for Rascal Clustering.  In general, the option names and defaults
// are taken from the paper:
// 'A Line Graph Algorithm for Clustering Chemical Structures Based
// on Common Substructural Cores', JW Raymond, PW Willett.
// https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf
// https://eprints.whiterose.ac.uk/77598/

#include <RDGeneral/export.h>

#ifndef RASCALCLUSTEROPTIONS_H
#define RASCALCLUSTEROPTIONS_H

namespace RDKit {
namespace RascalMCES {

struct RDKIT_RASCALMCES_EXPORT RascalClusterOptions {
  double similarityCutoff = 0.7; /* Similarity cutoff for clustering.  Initial
                                    clusters will have molecule pairs of at
                                    least this similarity. */
  double a = 0.05; /* penalty score for each unconnected component in MCES */
  double b = 2.0;  /* weight of matched bonds over matched atoms */
  unsigned int minFragSize =
      3; /* minimum number of atoms in a fragment for it to
            be included in the MCES.  Also p in the paper. */
  double minIntraClusterSim = 0.9; /* two pairs of molecules are included in the
                                      same cluster if the similarity between
                                      their MCESs is greater than this. S_a
                                      in the paper */
  double clusterMergeSim = 0.6;    /* two clusters are merged if fraction of
                                      molecules they have in common is greater than
                                      this. S_b in the paper */
  unsigned int maxNumFrags = 2; /* The maximum number of fragments in any MCES.
                                   Otherwise the MCES can be a lot of small
                                   fragments scattered across the molecule - it
                                   tries too hard to find a match, sometimes */
  int numThreads = -1; /* The number of threads to use.  If > 0, will use that
                          number.  If <= 0, will use the number of hardware
                          threads plus this number.  So if the number of
                          hardware threads is 8, and numThreads is -1, it will
                          use 7 threads. */
};
}  // namespace RascalMCES
}  // namespace RDKit
#endif  // RASCALCLUSTEROPTIONS_H