File: RascalMCES.h

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (73 lines) | stat: -rw-r--r-- 2,682 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//
// Copyright (C) David Cosgrove 2023
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//

#include <RDGeneral/export.h>
#ifndef RDKIT_RASCAL_MCES_H
#define RDKIT_RASCAL_MCES_H

#include <vector>

#include <GraphMol/RascalMCES/RascalClusterOptions.h>
#include <GraphMol/RascalMCES/RascalOptions.h>
#include <GraphMol/RascalMCES/RascalResult.h>
namespace RDKit {
class ROMol;

namespace RascalMCES {

// Find one or more MCESs between the two molecules.  The MCES is the
// Maximum Common Edge Substructure, and is the largest set of bonds
// common to the 2 molecules.
/*!
 *
 * @param mol1 : first molecule
 * @param mol2 : second molecule for MCES determination.
 * @param opts : (optional) set of options controlling the MCES determination
 * @return : vector of RascalResult objects.
 */
RDKIT_RASCALMCES_EXPORT std::vector<RascalResult> rascalMCES(
    const ROMol &mol1, const ROMol &mol2,
    const RascalOptions &opts = RascalOptions());

// Cluster the molecules using the Johnson similarity from rascalMCES
// and the algorithm of
// 'A Line Graph Algorithm for Clustering Chemical Structures Based
// on Common Substructural Cores', JW Raymond, PW Willett.
// https://match.pmf.kg.ac.rs/electronic_versions/Match48/match48_197-207.pdf
// https://eprints.whiterose.ac.uk/77598/
// This is a fuzzy clustering algorithm, so a molecule may appear in more than
// one cluster.  The final cluster is all the molecules that didn't fit into
// another cluster (the singletons).
/*!
 *
 * @param mols : molecules to cluster
 * @param clusOpts : (optional) cluster options
 * @return clusters as vector of vectors of unsigned ints - indices into the
 *         input mols vector
 */
RDKIT_RASCALMCES_EXPORT std::vector<std::vector<unsigned int>> rascalCluster(
    const std::vector<std::shared_ptr<ROMol>> &mols,
    const RascalClusterOptions &clusOpts = RascalClusterOptions());
// Cluster the molecules using the Johnson similarity from rascalMCES and
// the Butina algorithm.  Butina JCICS 39 747-750 (1999).
/*!
 *
 * @param mols : molecules to cluster
 * @param clusOpts : (optional) cluster options
 * @return clusters as vector of vectors of unsigned ints - indices into the
 *         input mols vector
 */
RDKIT_RASCALMCES_EXPORT std::vector<std::vector<unsigned int>>
rascalButinaCluster(
    const std::vector<std::shared_ptr<ROMol>> &mols,
    const RascalClusterOptions &clusOpts = RascalClusterOptions());
}  // namespace RascalMCES
}  // namespace RDKit
#endif  // RDKIT_RASCAL_MCES_H