File: MolHash.h

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (88 lines) | stat: -rw-r--r-- 2,834 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// $Id$
//
//  Copyright (C) 2014 Novartis Institutes for BioMedical Research
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#pragma once
#include <map>
#include <vector>
#include <string>
#include <stdexcept>
#include <boost/cstdint.hpp>
#include "../RDKitBase.h"

namespace RDKit {
namespace MolHash {
typedef boost::uint32_t HashCodeType;

RDKIT_MOLHASH_EXPORT HashCodeType generateMoleculeHashCode(
    const ROMol &mol, const std::vector<unsigned> *atomsToUse = 0,
    const std::vector<unsigned> *bondsToUse =
        0,  // ?? listed bonds between/to/from excluded atom(s) ??
    const std::vector<boost::uint32_t> *atomCodes = 0,
    const std::vector<boost::uint32_t> *bondCodes = 0);

enum CodeFlags  // bitwise flags to combine and compute atom/bond codes
{ CF_NO_LABELS = 0x0000,
  CF_ELEMENT = 0x0001,
  CF_CHARGE = 0x0002,
  CF_VALENCE = 0x0004,
  CF_ISOTOPE = 0x0008,
  CF_ATOM_CHIRALITY = 0x0010,
  CF_ATOM_AROMATIC = 0x0020,
  CF_ATOM_ALL = 0x00FF,
  CF_BOND_ORDER =
      0x0100,  // ignore AROMATIZATION if corresponding flag is not specified
  CF_BOND_AROMATIZATION = 0x0200,
  CF_BOND_TYPE_EXACT =
      CF_BOND_ORDER | CF_BOND_AROMATIZATION,  // exact type value with aromatic
  CF_BOND_CHIRALITY =
      0x0400,  // include bond chirality information into bond code
  CF_BOND_IN_RING = 0x0800,
  CF_BOND_ALL = 0xFF00,
  CF_ALL = 0xFFFF,
};

RDKIT_MOLHASH_EXPORT void fillAtomBondCodes(
    const ROMol &mol, boost::uint64_t flags  // CodeFlags constants combination
    ,
    std::vector<boost::uint32_t> *atomCodes  // NULL is allowed
    ,
    std::vector<boost::uint32_t> *bondCodes);  // NULL is allowed

#pragma pack(push, 1)
struct RDKIT_MOLHASH_EXPORT HashSet {
  boost::uint16_t Version;
  boost::uint16_t Reserved;
  boost::uint16_t NumAtoms;
  boost::uint16_t NumBonds;
  boost::uint32_t FormulaCRC32;
  HashCodeType NonChiralAtomsHash;
  HashCodeType NonChiralBondsHash;
  HashCodeType ChiralAtomsHash;
  HashCodeType ChiralBondsHash;
  HashCodeType ChiralityHash;

 public:
  HashSet() { memset(this, 0, sizeof(*this)); }
};
#pragma pack(pop)

RDKIT_MOLHASH_EXPORT void generateMoleculeHashSet(const ROMol &mol, HashSet &res,
                             const std::vector<unsigned> *atomsToUse = 0,
                             const std::vector<unsigned> *bondsToUse = 0);

RDKIT_MOLHASH_EXPORT std::string generateMoleculeHashSet(
    const ROMol &mol, const std::vector<unsigned> *atomsToUse = 0,
    const std::vector<unsigned> *bondsToUse = 0);

RDKIT_MOLHASH_EXPORT std::string encode(const void *bin,
                   size_t size);  // binary data to Base64 encoded string
}
}