File: FingerprintUtil.h

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (160 lines) | stat: -rw-r--r-- 5,562 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//
//  Copyright (C) 2018 Boran Adas, Google Summer of Code
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//

#include <RDGeneral/export.h>
#ifndef RD_FINGERPRINTUTIL_H_2018_07
#define RD_FINGERPRINTUTIL_H_2018_07

#include <GraphMol/RDKitBase.h>
#include <DataStructs/SparseIntVect.h>
#include <DataStructs/BitVects.h>
#include <boost/cstdint.hpp>

#include <vector>
#include <map>
#include <DataStructs/ExplicitBitVect.h>

#include <GraphMol/Subgraphs/Subgraphs.h>

namespace RDKit {
namespace AtomPairs {
const unsigned int numTypeBits = 4;
const unsigned int atomNumberTypes[1 << numTypeBits] = {
    5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 51, 52, 43};
const unsigned int numPiBits = 2;
const unsigned int maxNumPi = (1 << numPiBits) - 1;
const unsigned int numBranchBits = 3;
const unsigned int maxNumBranches = (1 << numBranchBits) - 1;
const unsigned int numChiralBits = 2;
const unsigned int codeSize = numTypeBits + numPiBits + numBranchBits;
const unsigned int numPathBits = 5;
const unsigned int maxPathLen = (1 << numPathBits) - 1;
const unsigned int numAtomPairFingerprintBits =
    numPathBits + 2 * codeSize;  // note that this is only accurate if chirality
                                 // is not included

//! returns a numeric code for the atom (the atom's hash in the
//! atom-pair scheme)
/*!
  \param atom            the atom to be considered
  \param branchSubtract  (optional) a constant to subtract from
  the number of neighbors when the hash
  is calculated (used in the topological
  torsions code)
  \param includeChirality toggles the inclusions of bits indicating R/S
  chirality
*/
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomCode(
    const Atom *atom, unsigned int branchSubtract = 0,
    bool includeChirality = false);

//! returns an atom pair hash based on two atom hashes and the
//! distance between the atoms.
/*!
  \param codeI  the hash for the first atom
  \param codeJ  the hash for the second atom
  \param dist   the distance (number of bonds) between the two
  atoms
  \param includeChirality toggles the inclusions of bits indicating R/S
  chirality
*/
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomPairCode(
    std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist,
    bool includeChirality = false);

//! returns an topological torsion hash based on the atom hashes
//! passed in
/*!
  \param atomCodes  the vector of atom hashes
*/
RDKIT_FINGERPRINTS_EXPORT std::uint64_t getTopologicalTorsionCode(
    const std::vector<std::uint32_t> &atomCodes, bool includeChirality = false);

RDKIT_FINGERPRINTS_EXPORT std::uint32_t getTopologicalTorsionHash(
    const std::vector<std::uint32_t> &pathCodes);
}  // namespace AtomPairs

namespace MorganFingerprints {

class RDKIT_FINGERPRINTS_EXPORT ss_matcher {
 public:
  ss_matcher();
  ss_matcher(const std::string &pattern);

  // const RDKit::ROMOL_SPTR &getMatcher() const { return m_matcher; };
  const RDKit::ROMol *getMatcher() const;

 private:
  RDKit::ROMOL_SPTR m_matcher;
};

typedef boost::tuple<boost::dynamic_bitset<>, uint32_t, unsigned int>
    AccumTuple;

RDKIT_FINGERPRINTS_EXPORT extern std::vector<std::string> defaultFeatureSmarts;

//! returns the connectivity invariants for a molecule
/*!

  \param mol :    the molecule to be considered
  \param invars : used to return the results
  \param includeRingMembership : if set, whether or not the atom is in
             a ring will be used in the invariant list.
*/
RDKIT_FINGERPRINTS_EXPORT void getConnectivityInvariants(
    const ROMol &mol, std::vector<boost::uint32_t> &invars,
    bool includeRingMembership = true);
const std::string morganConnectivityInvariantVersion = "1.0.0";

//! returns the feature invariants for a molecule
/*!

  \param mol:    the molecule to be considered
  \param invars : used to return the results
  \param patterns: if provided should contain the queries used to assign
  atom-types.
                   if not provided, feature definitions adapted from reference:
                   Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
                   will be used for Donor, Acceptor, Aromatic, Halogen, Basic,
  Acidic

*/
RDKIT_FINGERPRINTS_EXPORT void getFeatureInvariants(
    const ROMol &mol, std::vector<boost::uint32_t> &invars,
    std::vector<const ROMol *> *patterns = 0);
const std::string morganFeatureInvariantVersion = "0.1.0";

}  // namespace MorganFingerprints

namespace RDKitFPUtils {

RDKIT_FINGERPRINTS_EXPORT void buildDefaultRDKitFingerprintAtomInvariants(
    const ROMol &mol, std::vector<boost::uint32_t> &lAtomInvariants);

RDKIT_FINGERPRINTS_EXPORT void enumerateAllPaths(
    const ROMol &mol, std::map<int, std::list<std::vector<int>>> &allPaths,
    const std::vector<boost::uint32_t> *fromAtoms, bool branchedPaths,
    bool useHs, unsigned int minPath, unsigned int maxPath);

RDKIT_FINGERPRINTS_EXPORT void identifyQueryBonds(
    const ROMol &mol, std::vector<const Bond *> &bondCache,
    std::vector<short> &isQueryBond);

RDKIT_FINGERPRINTS_EXPORT std::vector<unsigned int> generateBondHashes(
    const ROMol &mol, boost::dynamic_bitset<> &atomsInPath,
    const std::vector<const Bond *> &bondCache,
    const std::vector<short> &isQueryBond, const std::vector<int> &path,
    bool useBondOrder, const std::vector<boost::uint32_t> *atomInvariants);

}  // namespace RDKitFPUtils

}  // namespace RDKit

#endif