File: query_molecule.h

package info (click to toggle)
indigo 1.2.3-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 28,256 kB
  • sloc: ansic: 309,316; cpp: 137,636; cs: 9,118; asm: 8,011; java: 7,195; sql: 6,697; xml: 4,352; python: 3,426; sh: 207; php: 56; makefile: 49
file content (372 lines) | stat: -rw-r--r-- 12,242 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/****************************************************************************
 * Copyright (C) 2009-2015 EPAM Systems
 *
 * This file is part of Indigo toolkit.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 3 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 ***************************************************************************/

#ifndef __query_molecule_h__
#define __query_molecule_h__

#include "molecule/base_molecule.h"
#include "base_cpp/auto_ptr.h"
#include "base_cpp/ptr_array.h"
#include "molecule/molecule_3d_constraints.h"
#include "molecule/molecule_arom.h"

#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable:4251)
#endif

namespace indigo {

enum
{
   SKIP_3D_CONSTRAINTS = 0x0100,
   SKIP_FIXED_ATOMS = 0x0200,
   SKIP_RGROUPS = 0x0400,
   SKIP_AROMATICITY = 0x0800,
   SKIP_COMPONENTS = 0x1000
};

class Output;

class DLLEXPORT QueryMolecule : public BaseMolecule
{
public:

   enum OpType
   {
      OP_NONE, // used on totally unconstrained atoms
      OP_AND,
      OP_OR,
      OP_NOT,

      ATOM_NUMBER,
      ATOM_PSEUDO,
      ATOM_RSITE,
      ATOM_CHARGE,
      ATOM_ISOTOPE,
      ATOM_RADICAL,
      ATOM_VALENCE,
      //ATOM_DEGREE,
      ATOM_CONNECTIVITY,
      ATOM_TOTAL_BOND_ORDER,
      ATOM_TOTAL_H,
      //ATOM_IMPLICIT_H,
      ATOM_SUBSTITUENTS,
      ATOM_SUBSTITUENTS_AS_DRAWN,
      ATOM_SSSR_RINGS,
      ATOM_SMALLEST_RING_SIZE,
      ATOM_RING_BONDS,
      ATOM_RING_BONDS_AS_DRAWN,
      ATOM_UNSATURATION,
      ATOM_FRAGMENT,
      ATOM_AROMATICITY,
      ATOM_TEMPLATE,
      ATOM_TEMPLATE_SEQID,
      ATOM_TEMPLATE_CLASS,

      BOND_ORDER,
      BOND_TOPOLOGY,

      HIGHLIGHTING
   };

   class DLLEXPORT Node
   {
   public:
      Node (int type_);
      virtual ~Node ();
      
      OpType type; // OP_*** or ATOM_*** or BOND_***

      // type is OP_NOT: one child
      // type is OP_AND or OP_OR: more that one child
      // otherwise: no children
      PtrArray<Node> children;

      // Check if node has any constraint of the specific type
      bool hasConstraint      (int what_type);

      // Check if there is no other constraint, except specified ones
      bool hasNoConstraintExcept (int what_type);
      bool hasNoConstraintExcept (int what_type1, int what_type2);

      // Remove all constraints of the given type
      void removeConstraints (int what_type);

      bool sureValue         (int what_type, int &value);
      bool sureValueInv      (int what_type, int &value);
      bool possibleValue     (int what_type, int what_value);
      bool possibleValueInv  (int what_type, int what_value);
      bool possibleValuePair (int what_type1, int what_value1,
                              int what_type2, int what_value2);
      bool possibleValuePairInv (int what_type1, int what_value1,
                                 int what_type2, int what_value2);

      bool sureValueBelongs    (int what_type, const int *arr, int count);
      bool sureValueBelongsInv (int what_type, const int *arr, int count);

      // Optimize query for faster substructure search
      void optimize ();
   protected:
      // "neu" means "new" in German. This should have been a static
      // method, but static methods can not be virtual, and so it is not static.
      virtual Node * _neu () = 0;

      static Node * _und (Node *node1, Node *node2);
      static Node * _oder (Node *node1, Node *node2);
      static Node * _nicht (Node *node);

      virtual bool _possibleValue      (int what_type, int what_value) = 0;
      virtual bool _possibleValuePair  (int what_type1, int what_value1,
                                        int what_type2, int what_value2) = 0;

      Node* _findSureConstraint (int what_type, int &count);
      
      virtual bool _sureValue        (int what_type, int &value_out) = 0;
      virtual bool _sureValueBelongs (int what_type, const int *arr, int count) = 0;

      virtual void _optimize () {};
   };

   class DLLEXPORT Atom : public Node
   {
   public:
      Atom ();

      Atom (int type, int value);
      Atom (int type, int value_min, int value_max);
      Atom (int type, const char *value);
      Atom (int type, QueryMolecule *value);
      
      virtual ~Atom ();

      Atom * clone ();
      void copy (Atom &other);

      Atom * child (int idx);

      bool valueWithinRange (int value);

      bool hasConstraintWithValue (int what_type, int what_value);

      Atom* sureConstraint (int what_type);

      int value_min;
      int value_max;

      // available only when type is ATOM_PSEUDO or ATOM_TEMPLATE or ATOM_TEMPLATE_CLASS
      Array<char> alias;

      // available only when type is ATOM_FRAGMENT
      AutoPtr<QueryMolecule> fragment;

      // when type is ATOM_RSITE, the value (value_min=valuemax)
      // are 32 bits, each allowing an r-group with corresponding number
      // to go for this atom. Simple 'R' atoms have this field equal to zero.

      // "und" means "and" in German. "and" is a C++ keyword.
      static Atom * und (Atom *atom1, Atom *atom2);

      // "oder" means "or" in German. "or" is a C++ keyword.
      static Atom * oder (Atom *atom1, Atom *atom2);

      // "nicht" means "not" in German. "not" is a C++ keyword.
      static Atom * nicht (Atom *atom);

   protected:

      virtual Node * _neu ();
      
      virtual bool _possibleValue      (int what_type, int what_value);
      virtual bool _possibleValuePair  (int what_type1, int what_value1,
                                        int what_type2, int what_value2);
      virtual bool _sureValue        (int what_type, int &value_out);
      virtual bool _sureValueBelongs (int what_type, const int *arr, int count);
      
      virtual void _optimize ();

      DECL_ERROR;
   };

   class DLLEXPORT Bond : public Node
   {
   public:
      Bond ();
      Bond (int type_, int value_);
      virtual ~Bond ();

      int value;

      Bond * clone ();

      Bond * child (int idx);

      // "und" means "and" in German. "and" is a C++ keyword.
      static Bond * und (Bond *node1, Bond *node2);

      // "oder" means "or" in German. "or" is a C++ keyword.
      static Bond * oder (Bond *node1, Bond *node2);

      // "nicht" means "not" in German. "not" is a C++ keyword.
      static Bond * nicht (Bond *node);

   protected:
      virtual Node * _neu ();

      virtual bool _possibleValue      (int what_type, int what_value);
      virtual bool _possibleValuePair  (int what_type1, int what_value1,
                                        int what_type2, int what_value2);
      virtual bool _sureValue        (int what_type, int &value_out);
      virtual bool _sureValueBelongs (int what_type, const int *arr, int count);
   };

   QueryMolecule ();
   virtual ~QueryMolecule ();

   virtual void clear ();

   virtual BaseMolecule * neu ();

   virtual QueryMolecule& asQueryMolecule ();
   virtual bool isQueryMolecule ();

   virtual int getAtomNumber  (int idx);
   virtual int getAtomCharge  (int idx); 
   virtual int getAtomIsotope (int idx);
   virtual int getAtomRadical (int idx);
   virtual int getExplicitValence (int idx);
   virtual int getAtomAromaticity (int idx);
   virtual int getAtomValence        (int idx);
   virtual int getAtomSubstCount     (int idx);
   virtual int getAtomRingBondsCount (int idx);

   virtual int getAtomMaxH   (int idx);
   virtual int getAtomMinH   (int idx);
   virtual int getAtomTotalH (int idx);

   virtual bool isPseudoAtom (int idx);
   virtual const char * getPseudoAtom (int idx);

   virtual bool isTemplateAtom (int idx);
   virtual const char * getTemplateAtom (int idx);
   virtual const int getTemplateAtomSeqid (int idx);
   virtual const char * getTemplateAtomClass (int idx);
   virtual const int getTemplateAtomDisplayOption (int idx);

   virtual bool  isRSite (int atom_idx);
   virtual dword getRSiteBits (int atom_idx);
   virtual void  allowRGroupOnRSite (int atom_idx, int rg_idx);

   virtual bool isSaturatedAtom (int idx);

   virtual int  getBondOrder      (int idx);
   virtual int  getBondTopology   (int idx);
   virtual bool atomNumberBelongs (int idx, const int *numbers, int count);
   virtual bool possibleAtomNumber (int idx, int number);
   virtual bool possibleAtomNumberAndCharge (int idx, int number, int charge);
   virtual bool possibleAtomNumberAndIsotope (int idx, int number, int isotope);
   virtual bool possibleAtomIsotope (int idx, int number);
   virtual bool possibleAtomCharge  (int idx, int charge);
   virtual bool possibleAtomRadical (int idx, int radical);
   virtual void getAtomDescription  (int idx, Array<char> &description);
   virtual void getBondDescription  (int idx, Array<char> &description);
   virtual bool possibleBondOrder   (int idx, int order);

   bool possibleNitrogenV5 (int idx);

   enum QUERY_ATOM {QUERY_ATOM_A, QUERY_ATOM_X, QUERY_ATOM_Q, QUERY_ATOM_LIST, QUERY_ATOM_NOTLIST};
   enum QUERY_BOND {QUERY_BOND_DOUBLE_OR_AROMATIC = 0, QUERY_BOND_SINGLE_OR_AROMATIC, QUERY_BOND_SINGLE_OR_DOUBLE, QUERY_BOND_ANY};
   static bool isKnownAttr (QueryMolecule::Atom& qa);
   static bool isNotAtom (QueryMolecule::Atom& qa, int elem);
   static QueryMolecule::Atom* stripKnownAttrs (QueryMolecule::Atom& qa);
   static bool collectAtomList (Atom& qa, Array<int>& list, bool& notList);
   static int parseQueryAtom (QueryMolecule& qm, int aid, Array<int>& list);
   static bool queryAtomIsRegular (QueryMolecule& qm, int aid);
   static Bond* getBondOrderTerm (Bond& qb, bool& complex);
   static bool isOrBond (Bond& qb, int type1, int type2);
   static bool isSingleOrDouble (Bond& qb);
   static int getQueryBondType (Bond& qb);

   virtual bool bondStereoCare (int idx);
   void setBondStereoCare (int idx, bool stereo_care);

   virtual bool aromatize (const AromaticityOptions &options);
   virtual bool dearomatize (const AromaticityOptions &options);

   int addAtom (Atom *atom);
   Atom & getAtom (int idx);
   Atom * releaseAtom (int idx);
   void   resetAtom (int idx, Atom *atom);

   Bond & getBond (int idx);
   Bond * releaseBond (int idx);
   void   resetBond (int idx, Bond *bond);
   int addBond (int beg, int end, Bond *bond);

   void optimize ();

   Molecule3dConstraints spatial_constraints;
   Array<int> fixed_atoms;
   
   QueryMoleculeAromaticity aromaticity;

   Array<char> fragment_smarts;

   // for component-level grouping of SMARTS
   // components[i] = 0 means nothing;
   // components[i] = components[j] > 0 means that i-th and j-th vertices
   // must belong to the same connected component of the target molecule;
   // components[i] != components[j] > 0 means that i-th and j-th vertices
   // must belong to different connected components of the target molecule
   Array<int> components;

   virtual void invalidateAtom (int index, int mask);

   int getAtomMaxExteralConnectivity (int idx);

   bool standardize (const StandardizeOptions &options);

protected:

   int _calcAtomConnectivity (int idx);
   void _getAtomDescription (Atom *atom, Output &out, int depth);
   void _getBondDescription (Bond *bond, Output &out);
   int _getAtomMinH (Atom *atom);
   
   virtual void _flipBond (int atom_parent, int atom_from, int atom_to);
   virtual void _mergeWithSubmolecule (BaseMolecule &bmol, const Array<int> &vertices,
                                       const Array<int> *edges, const Array<int> &mapping,
                                       int skip_flags);
   virtual void _postMergeWithSubmolecule (BaseMolecule &bmol, const Array<int> &vertices,
                                       const Array<int> *edges, const Array<int> &mapping,
                                       int skip_flags);
   virtual void _removeAtoms (const Array<int> &indices, const int *mapping);
   virtual void _removeBonds (const Array<int> &indices);

   Array<int> _min_h;

   Array<bool> _bond_stereo_care;

   PtrArray<Atom> _atoms;
   PtrArray<Bond> _bonds;

};

}

#ifdef _WIN32
#pragma warning(pop)
#endif

#endif