File: StructChecker.h

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (300 lines) | stat: -rw-r--r-- 9,150 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
//
//  Copyright (C) 2016 Novartis Institutes for BioMedical Research
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//

/*! \file StructChecker.h

\brief Contains the public API of the StructChecker

\b Note that this should be considered beta and that the API may change in
future
releases.

*/
#include <RDGeneral/export.h>
#pragma once
#ifndef RD_STRUCTCHECKER_H_Oct2016
#define RD_STRUCTCHECKER_H_Oct2016

#include <string>
#include <vector>
#include "../RDKitBase.h"

namespace RDKit {
namespace StructureCheck {

// Flags for the return values of the StructureChecker

// TypeDefs for translating augmented atom pairs
static const int ANY_CHARGE = 8;
enum RadicalType {
  RT_NONE = 0,
  SINGLET = 1,
  DOUBLET = 2,
  TRIPLET = 3,
  ANY_RADICAL = 0xFF
};

enum AABondType {  // MDL CTFile bond types plus extensions
  BT_NONE = 0,     // means REMOVE Bond
  SINGLE = 1,
  DOUBLE = 2,
  TRIPLE = 3,
  AROMATIC = 4,
  SINGLE_DOUBLE = 5,
  SINGLE_AROMATIC = 6,
  DOUBLE_AROMATIC = 7,
  ANY_BOND = 8,
  ALL_BOND_TYPES = 0xF
};

enum AATopology {
  TP_NONE = 0,  // Don't care
  RING = 1,     // Ring
  CHAIN = 2     // Chain
};

struct RDKIT_STRUCTCHECKER_EXPORT Ligand {
  std::string AtomSymbol;
  int Charge;
  RadicalType Radical;
  unsigned SubstitutionCount;  // substitution count 0 = don't care
  AABondType BondType;
  Ligand()
      : Charge(ANY_CHARGE),
        Radical(ANY_RADICAL),
        SubstitutionCount(0),
        BondType(ANY_BOND) {}
};

struct RDKIT_STRUCTCHECKER_EXPORT AugmentedAtom {
  std::string AtomSymbol;
  std::string ShortName;
  int Charge;
  RadicalType Radical;
  AATopology Topology;
  std::vector<Ligand> Ligands;

  AugmentedAtom()
      : Charge(ANY_CHARGE), Radical(ANY_RADICAL), Topology(TP_NONE) {}

  AugmentedAtom(const std::string &symbol, const std::string &name, int charge,
                RadicalType radical, AATopology topology)
      : AtomSymbol(symbol),
        ShortName(name),
        Charge(charge),
        Radical(radical),
        Topology(topology) {}
};

struct RDKIT_STRUCTCHECKER_EXPORT IncEntry {
  std::string AtomSymbol;
  double LocalInc;
  double AlphaInc;
  double BetaInc;
  double MultInc;

  // Used for logging
  int local_inc_used;
  int alpha_inc_used;
  int beta_inc_used;
  int mult_inc_used;
};

struct RDKIT_STRUCTCHECKER_EXPORT PathEntry {
  AugmentedAtom Path;
  double Cond;
  // Used for logging
  int cond_used;
};
//-------------

//! Structure Check Options
//    Holds all the user options for the StructureChecking.
//    Can be initialized from factory functions, perhaps serialized
struct RDKIT_STRUCTCHECKER_EXPORT StructCheckerOptions {
  double AcidityLimit;
  bool RemoveMinorFragments;
  int DesiredCharge;
  bool CheckCollisions;
  int CollisionLimitPercent;
  unsigned MaxMolSize;
  bool ConvertSText;
  bool SqueezeIdentifiers;
  bool StripZeros;
  bool CheckStereo;
  bool ConvertAtomTexts;
  bool GroupsToSGroups;
  bool Verbose;

  // Internal data for struchk
  std::vector<std::pair<AugmentedAtom, AugmentedAtom> > AugmentedAtomPairs;
  std::vector<AugmentedAtom> AcidicAtoms;
  std::vector<AugmentedAtom> GoodAtoms;
  std::vector<ROMOL_SPTR> Patterns;
  std::vector<ROMOL_SPTR> RotatePatterns;
  std::vector<ROMOL_SPTR> StereoPatterns;
  std::vector<ROMOL_SPTR> FromTautomer;
  std::vector<ROMOL_SPTR> ToTautomer;

  double Elneg0;                          // elneg_table[0].value;
  std::map<unsigned, double> ElnegTable;  // AtomicNumber -> eleng
  std::vector<IncEntry> AtomAcidity;      // atom_acidity_table[]
  std::vector<IncEntry> ChargeIncTable;
  // std::map AtomSymbol(or AtomicNumber) -> IncEntry
  /* [ReadTransformation() ]
  * The alpha, beta coefficients of the transfomation function used
  * to stretch the preliminary pKa values to the actual predictions.
  * The function is pKa = 7 + (pKa'-7)*beta + ((pKa'-7)*alpha)^3.
  */

  double Alpha, Beta;
  std::vector<PathEntry> AlphaPathTable, BetaPathTable;

 public:
  StructCheckerOptions();

  void clear() { *this = StructCheckerOptions(); }

  bool loadAugmentedAtomTranslations(const std::string &path);
  void setAugmentedAtomTranslations(
      const std::vector<std::pair<AugmentedAtom, AugmentedAtom> > &aaPairs);

  bool loadAcidicAugmentedAtoms(const std::string &path);
  void setAcidicAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);

  bool loadGoodAugmentedAtoms(const std::string &path);
  void setGoodAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);

  bool loadPatterns(const std::string &path);  // file with clean patterns
  void parsePatterns(
      const std::vector<std::string> &smarts);  // can throw RDKit exeptions
  void setPatterns(const std::vector<ROMOL_SPTR> &p);

  bool loadRotatePatterns(
      const std::string &path);  // file with rotate patterns
  void parseRotatePatterns(
      const std::vector<std::string> &smarts);  // can throw RDKit exeptions
  void setRotatePatterns(const std::vector<ROMOL_SPTR> &p);

  bool loadStereoPatterns(
      const std::string &path);  // file with stereo patterns
  void parseStereoPatterns(
      const std::vector<std::string> &smarts);  // can throw RDKit exeptions
  void setStereoPatterns(const std::vector<ROMOL_SPTR> &p);

  bool loadTautomerData(const std::string &path);  // file path
  void parseTautomerData(const std::vector<std::string> &smartsFrom,
                         const std::vector<std::string> &smartsTo);
  void setTautomerData(const std::vector<ROMOL_SPTR> &from,
                       const std::vector<ROMOL_SPTR> &to);
  bool loadChargeDataTables(const std::string &path);  // file path
};

RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op);

RDKIT_STRUCTCHECKER_EXPORT bool loadOptionsFromFiles(
    StructCheckerOptions &op,
    const std::string &augmentedAtomTranslationsFile = "",
    // ?? AcidicAtoms;
    // ?? GoodAtoms;
    const std::string &patternFile = "",        // file with clean patterns
    const std::string &rotatePatternFile = "",  // file with rotate patterns
    const std::string &stereoPatternFile = "",  // file with stereo patterns
    const std::string &tautomerFile = "");

//! \brief Class for performing structure validation and cleanup
/*! \b NOTE: This class should be considered beta. The API may change in future
releases.

Examples of Usage

\code
  StructChecker chk;
  int flags = StructureCheck::checkMolStructure( mol ); // use defaults
\endcode

or

\code
    StructureCheck::StructCheckerOptions options;   // use defaults
    // To use external data
    StructureCheck::loadOptionsFromFiles(options, file1, file2);
    StructChecker chk(options);

    for( mol in mols ) {
        int flags = StructureCheck::checkMolStructure( mol, &options);
        if (0!=(flags & StructureCheck::StructureFlags::BAD_SET)) {
        // write to error file
        } else if (0!=(flags & StructureCheck::StructureFlags::TRANSFORMED_SET))
{
        // input molecule was transformed
        } else { // flag == NO_CHANGE
        // no change
        }
    }
\endcode
*/
class RDKIT_STRUCTCHECKER_EXPORT StructChecker {
 public:
  typedef enum StructureFlags {
    NO_CHANGE = 0,
    BAD_MOLECULE = 0x0001,
    ALIAS_CONVERSION_FAILED = 0x0002,
    STEREO_ERROR = 0x0004,
    STEREO_FORCED_BAD = 0x0008,
    ATOM_CLASH = 0x0010,
    ATOM_CHECK_FAILED = 0x0020,
    SIZE_CHECK_FAILED = 0x0040,
    // reserved error = 0x0080,
    TRANSFORMED = 0x0100,
    FRAGMENTS_FOUND = 0x0200,
    EITHER_WARNING = 0x0400,
    DUBIOUS_STEREO_REMOVED = 0x0800,
    RECHARGED = 0x1000,
    STEREO_TRANSFORMED = 0x2000,
    TEMPLATE_TRANSFORMED = 0x4000,
    TAUTOMER_TRANSFORMED = 0x8000,
    // mask:
    BAD_SET = (BAD_MOLECULE | ALIAS_CONVERSION_FAILED | STEREO_ERROR |
               STEREO_FORCED_BAD | ATOM_CLASH | ATOM_CHECK_FAILED |
               SIZE_CHECK_FAILED),

    TRANSFORMED_SET = (TRANSFORMED | FRAGMENTS_FOUND | EITHER_WARNING |
                       DUBIOUS_STEREO_REMOVED | STEREO_TRANSFORMED |
                       TEMPLATE_TRANSFORMED | TAUTOMER_TRANSFORMED | RECHARGED),
  } StructureFlags;
  // attributes:
 private:
  StructCheckerOptions Options;

 public:
  inline StructChecker() {}
  inline StructChecker(const StructCheckerOptions &options)
      : Options(options) {}

  const StructCheckerOptions &GetOptions() const { return Options; }
  void SetOptions(const StructCheckerOptions &options) { Options = options; }

  // Check and fix (if need) molecule structure and return a set of
  // StructureFlags
  // that describes what have been done
  unsigned checkMolStructure(RWMol &mol) const;

  // an instance independed helper methods:
  // Converts structure property flags to a comma seperated string
  static std::string StructureFlagsToString(unsigned flags);
  // Converts a comma seperated string to a StructureFlag unsigned integer
  static unsigned StringToStructureFlags(const std::string &str);
  // internal implementation:
 private:
};
}
}
#endif