1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
|
/****************************************************************************
* Copyright (C) 2009-2015 EPAM Systems
*
* This file is part of Indigo toolkit.
*
* This file may be distributed and/or modified under the terms of the
* GNU General Public License version 3 as published by the Free Software
* Foundation and appearing in the file LICENSE.GPL included in the
* packaging of this file.
*
* This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
* WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
***************************************************************************/
#ifndef __query_molecule_h__
#define __query_molecule_h__
#include "molecule/base_molecule.h"
#include "base_cpp/auto_ptr.h"
#include "base_cpp/ptr_array.h"
#include "molecule/molecule_3d_constraints.h"
#include "molecule/molecule_arom.h"
#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable:4251)
#endif
namespace indigo {
enum
{
SKIP_3D_CONSTRAINTS = 0x0100,
SKIP_FIXED_ATOMS = 0x0200,
SKIP_RGROUPS = 0x0400,
SKIP_AROMATICITY = 0x0800,
SKIP_COMPONENTS = 0x1000
};
class Output;
class DLLEXPORT QueryMolecule : public BaseMolecule
{
public:
enum OpType
{
OP_NONE, // used on totally unconstrained atoms
OP_AND,
OP_OR,
OP_NOT,
ATOM_NUMBER,
ATOM_PSEUDO,
ATOM_RSITE,
ATOM_CHARGE,
ATOM_ISOTOPE,
ATOM_RADICAL,
ATOM_VALENCE,
//ATOM_DEGREE,
ATOM_CONNECTIVITY,
ATOM_TOTAL_BOND_ORDER,
ATOM_TOTAL_H,
//ATOM_IMPLICIT_H,
ATOM_SUBSTITUENTS,
ATOM_SUBSTITUENTS_AS_DRAWN,
ATOM_SSSR_RINGS,
ATOM_SMALLEST_RING_SIZE,
ATOM_RING_BONDS,
ATOM_RING_BONDS_AS_DRAWN,
ATOM_UNSATURATION,
ATOM_FRAGMENT,
ATOM_AROMATICITY,
ATOM_TEMPLATE,
ATOM_TEMPLATE_SEQID,
ATOM_TEMPLATE_CLASS,
BOND_ORDER,
BOND_TOPOLOGY,
HIGHLIGHTING
};
class DLLEXPORT Node
{
public:
Node (int type_);
virtual ~Node ();
OpType type; // OP_*** or ATOM_*** or BOND_***
// type is OP_NOT: one child
// type is OP_AND or OP_OR: more that one child
// otherwise: no children
PtrArray<Node> children;
// Check if node has any constraint of the specific type
bool hasConstraint (int what_type);
// Check if there is no other constraint, except specified ones
bool hasNoConstraintExcept (int what_type);
bool hasNoConstraintExcept (int what_type1, int what_type2);
// Remove all constraints of the given type
void removeConstraints (int what_type);
bool sureValue (int what_type, int &value);
bool sureValueInv (int what_type, int &value);
bool possibleValue (int what_type, int what_value);
bool possibleValueInv (int what_type, int what_value);
bool possibleValuePair (int what_type1, int what_value1,
int what_type2, int what_value2);
bool possibleValuePairInv (int what_type1, int what_value1,
int what_type2, int what_value2);
bool sureValueBelongs (int what_type, const int *arr, int count);
bool sureValueBelongsInv (int what_type, const int *arr, int count);
// Optimize query for faster substructure search
void optimize ();
protected:
// "neu" means "new" in German. This should have been a static
// method, but static methods can not be virtual, and so it is not static.
virtual Node * _neu () = 0;
static Node * _und (Node *node1, Node *node2);
static Node * _oder (Node *node1, Node *node2);
static Node * _nicht (Node *node);
virtual bool _possibleValue (int what_type, int what_value) = 0;
virtual bool _possibleValuePair (int what_type1, int what_value1,
int what_type2, int what_value2) = 0;
Node* _findSureConstraint (int what_type, int &count);
virtual bool _sureValue (int what_type, int &value_out) = 0;
virtual bool _sureValueBelongs (int what_type, const int *arr, int count) = 0;
virtual void _optimize () {};
};
class DLLEXPORT Atom : public Node
{
public:
Atom ();
Atom (int type, int value);
Atom (int type, int value_min, int value_max);
Atom (int type, const char *value);
Atom (int type, QueryMolecule *value);
virtual ~Atom ();
Atom * clone ();
void copy (Atom &other);
Atom * child (int idx);
bool valueWithinRange (int value);
bool hasConstraintWithValue (int what_type, int what_value);
Atom* sureConstraint (int what_type);
int value_min;
int value_max;
// available only when type is ATOM_PSEUDO or ATOM_TEMPLATE or ATOM_TEMPLATE_CLASS
Array<char> alias;
// available only when type is ATOM_FRAGMENT
AutoPtr<QueryMolecule> fragment;
// when type is ATOM_RSITE, the value (value_min=valuemax)
// are 32 bits, each allowing an r-group with corresponding number
// to go for this atom. Simple 'R' atoms have this field equal to zero.
// "und" means "and" in German. "and" is a C++ keyword.
static Atom * und (Atom *atom1, Atom *atom2);
// "oder" means "or" in German. "or" is a C++ keyword.
static Atom * oder (Atom *atom1, Atom *atom2);
// "nicht" means "not" in German. "not" is a C++ keyword.
static Atom * nicht (Atom *atom);
protected:
virtual Node * _neu ();
virtual bool _possibleValue (int what_type, int what_value);
virtual bool _possibleValuePair (int what_type1, int what_value1,
int what_type2, int what_value2);
virtual bool _sureValue (int what_type, int &value_out);
virtual bool _sureValueBelongs (int what_type, const int *arr, int count);
virtual void _optimize ();
DECL_ERROR;
};
class DLLEXPORT Bond : public Node
{
public:
Bond ();
Bond (int type_, int value_);
virtual ~Bond ();
int value;
Bond * clone ();
Bond * child (int idx);
// "und" means "and" in German. "and" is a C++ keyword.
static Bond * und (Bond *node1, Bond *node2);
// "oder" means "or" in German. "or" is a C++ keyword.
static Bond * oder (Bond *node1, Bond *node2);
// "nicht" means "not" in German. "not" is a C++ keyword.
static Bond * nicht (Bond *node);
protected:
virtual Node * _neu ();
virtual bool _possibleValue (int what_type, int what_value);
virtual bool _possibleValuePair (int what_type1, int what_value1,
int what_type2, int what_value2);
virtual bool _sureValue (int what_type, int &value_out);
virtual bool _sureValueBelongs (int what_type, const int *arr, int count);
};
QueryMolecule ();
virtual ~QueryMolecule ();
virtual void clear ();
virtual BaseMolecule * neu ();
virtual QueryMolecule& asQueryMolecule ();
virtual bool isQueryMolecule ();
virtual int getAtomNumber (int idx);
virtual int getAtomCharge (int idx);
virtual int getAtomIsotope (int idx);
virtual int getAtomRadical (int idx);
virtual int getExplicitValence (int idx);
virtual int getAtomAromaticity (int idx);
virtual int getAtomValence (int idx);
virtual int getAtomSubstCount (int idx);
virtual int getAtomRingBondsCount (int idx);
virtual int getAtomMaxH (int idx);
virtual int getAtomMinH (int idx);
virtual int getAtomTotalH (int idx);
virtual bool isPseudoAtom (int idx);
virtual const char * getPseudoAtom (int idx);
virtual bool isTemplateAtom (int idx);
virtual const char * getTemplateAtom (int idx);
virtual const int getTemplateAtomSeqid (int idx);
virtual const char * getTemplateAtomClass (int idx);
virtual const int getTemplateAtomDisplayOption (int idx);
virtual bool isRSite (int atom_idx);
virtual dword getRSiteBits (int atom_idx);
virtual void allowRGroupOnRSite (int atom_idx, int rg_idx);
virtual bool isSaturatedAtom (int idx);
virtual int getBondOrder (int idx);
virtual int getBondTopology (int idx);
virtual bool atomNumberBelongs (int idx, const int *numbers, int count);
virtual bool possibleAtomNumber (int idx, int number);
virtual bool possibleAtomNumberAndCharge (int idx, int number, int charge);
virtual bool possibleAtomNumberAndIsotope (int idx, int number, int isotope);
virtual bool possibleAtomIsotope (int idx, int number);
virtual bool possibleAtomCharge (int idx, int charge);
virtual bool possibleAtomRadical (int idx, int radical);
virtual void getAtomDescription (int idx, Array<char> &description);
virtual void getBondDescription (int idx, Array<char> &description);
virtual bool possibleBondOrder (int idx, int order);
bool possibleNitrogenV5 (int idx);
enum QUERY_ATOM {QUERY_ATOM_A, QUERY_ATOM_X, QUERY_ATOM_Q, QUERY_ATOM_LIST, QUERY_ATOM_NOTLIST};
enum QUERY_BOND {QUERY_BOND_DOUBLE_OR_AROMATIC = 0, QUERY_BOND_SINGLE_OR_AROMATIC, QUERY_BOND_SINGLE_OR_DOUBLE, QUERY_BOND_ANY};
static bool isKnownAttr (QueryMolecule::Atom& qa);
static bool isNotAtom (QueryMolecule::Atom& qa, int elem);
static QueryMolecule::Atom* stripKnownAttrs (QueryMolecule::Atom& qa);
static bool collectAtomList (Atom& qa, Array<int>& list, bool& notList);
static int parseQueryAtom (QueryMolecule& qm, int aid, Array<int>& list);
static bool queryAtomIsRegular (QueryMolecule& qm, int aid);
static Bond* getBondOrderTerm (Bond& qb, bool& complex);
static bool isOrBond (Bond& qb, int type1, int type2);
static bool isSingleOrDouble (Bond& qb);
static int getQueryBondType (Bond& qb);
virtual bool bondStereoCare (int idx);
void setBondStereoCare (int idx, bool stereo_care);
virtual bool aromatize (const AromaticityOptions &options);
virtual bool dearomatize (const AromaticityOptions &options);
int addAtom (Atom *atom);
Atom & getAtom (int idx);
Atom * releaseAtom (int idx);
void resetAtom (int idx, Atom *atom);
Bond & getBond (int idx);
Bond * releaseBond (int idx);
void resetBond (int idx, Bond *bond);
int addBond (int beg, int end, Bond *bond);
void optimize ();
Molecule3dConstraints spatial_constraints;
Array<int> fixed_atoms;
QueryMoleculeAromaticity aromaticity;
Array<char> fragment_smarts;
// for component-level grouping of SMARTS
// components[i] = 0 means nothing;
// components[i] = components[j] > 0 means that i-th and j-th vertices
// must belong to the same connected component of the target molecule;
// components[i] != components[j] > 0 means that i-th and j-th vertices
// must belong to different connected components of the target molecule
Array<int> components;
virtual void invalidateAtom (int index, int mask);
int getAtomMaxExteralConnectivity (int idx);
bool standardize (const StandardizeOptions &options);
protected:
int _calcAtomConnectivity (int idx);
void _getAtomDescription (Atom *atom, Output &out, int depth);
void _getBondDescription (Bond *bond, Output &out);
int _getAtomMinH (Atom *atom);
virtual void _flipBond (int atom_parent, int atom_from, int atom_to);
virtual void _mergeWithSubmolecule (BaseMolecule &bmol, const Array<int> &vertices,
const Array<int> *edges, const Array<int> &mapping,
int skip_flags);
virtual void _postMergeWithSubmolecule (BaseMolecule &bmol, const Array<int> &vertices,
const Array<int> *edges, const Array<int> &mapping,
int skip_flags);
virtual void _removeAtoms (const Array<int> &indices, const int *mapping);
virtual void _removeBonds (const Array<int> &indices);
Array<int> _min_h;
Array<bool> _bond_stereo_care;
PtrArray<Atom> _atoms;
PtrArray<Bond> _bonds;
};
}
#ifdef _WIN32
#pragma warning(pop)
#endif
#endif
|