File: Metal.cpp

package info (click to toggle)
rdkit 202503.1-4
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 578; xml: 229; fortran: 183; sh: 105
file content (283 lines) | stat: -rw-r--r-- 10,474 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
//
//  Copyright (C) 2018 Susan H. Leung
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include "Metal.h"
#include <GraphMol/FileParsers/MolSGroupParsing.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/Substruct/SubstructMatch.h>

#include <GraphMol/RDKitQueries.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <GraphMol/Substruct/SubstructUtils.h>

using namespace std;
using namespace RDKit;
namespace RDKit {
class RWMol;

class ROMol;

namespace MolStandardize {

MetalDisconnector::MetalDisconnector(const MetalDisconnectorOptions &options)
    : dp_metal_nof(
          SmartsToMol("[Li,Na,K,Rb,Cs,Fr,Be,Mg,Ca,Sr,Ba,Ra,Sc,Ti,V,Cr,Mn,Fe,Co,"
                      "Ni,Cu,Zn,Al,Ga,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Hf,Ta,"
                      "W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi]~[#7,#8,F]")),
      d_options(options) {
  BOOST_LOG(rdInfoLog) << "Initializing MetalDisconnector\n";
  std::string metalList =
      "Al,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,Hf,Ta,"
      "W,Re,Os,Ir,Pt,Au]~";
  std::string nonMetalList = "Si,P,As,Sb,S,Se,Te,Cl,Br,I,At]";
  if (d_options.splitGrignards) {
    metalList = "[Li,Na,Mg,K," + metalList;
  } else {
    metalList = "[" + metalList;
  }
  if (d_options.splitAromaticC) {
    nonMetalList = "[B,#6," + nonMetalList;
  } else {
    nonMetalList = "[B,C," + nonMetalList;
  }
  std::string metal_non_smt = metalList + nonMetalList;
  dp_metal_non.reset(RDKit::SmartsToMol(metal_non_smt));
  std::string metalDummySmt = metalList + "[*]";
  dp_metalDummy.reset(RDKit::SmartsToMol(metalDummySmt));
};

MetalDisconnector::MetalDisconnector(const MetalDisconnector &other)
    : dp_metal_nof(other.dp_metal_nof),
      dp_metal_non(other.dp_metal_non),
      dp_metalDummy(other.dp_metalDummy),
      d_options(other.d_options) {};

MetalDisconnector::~MetalDisconnector() {};

ROMol *MetalDisconnector::getMetalNof() { return dp_metal_nof.get(); }

ROMol *MetalDisconnector::getMetalNon() { return dp_metal_non.get(); }

void MetalDisconnector::setMetalNof(const ROMol &mol) {
  this->dp_metal_nof.reset(new ROMol(mol));
}

void MetalDisconnector::setMetalNon(const ROMol &mol) {
  this->dp_metal_non.reset(new ROMol(mol));
}

ROMol *MetalDisconnector::disconnect(const ROMol &mol) {
  auto *res = new RWMol(mol);
  MetalDisconnector::disconnect(*res);
  return static_cast<ROMol *>(res);
}

void MetalDisconnector::disconnect(RWMol &mol) {
  BOOST_LOG(rdInfoLog) << "Running MetalDisconnector\n";
  std::list<ROMOL_SPTR> metalList = {dp_metal_nof, dp_metal_non};
  std::map<int, NonMetal> nonMetals;
  std::map<int, int> metalChargeExcess;
  for (auto &query : metalList) {
    std::vector<MatchVectType> matches;
    SubstructMatch(mol, *query, matches);

    for (const auto &match : matches) {
      int metal_idx = match[0].second;
      metalChargeExcess[metal_idx] = 0;
      auto metal = mol.getAtomWithIdx(metal_idx);
      int non_idx = match[1].second;
      auto nonMetal = mol.getAtomWithIdx(non_idx);

      Bond *b = mol.getBondBetweenAtoms(metal_idx, non_idx);
      int order = (b->getBondType() >= Bond::DATIVEONE &&
                   b->getBondType() <= Bond::DATIVER)
                      ? 0
                      : static_cast<int>(b->getBondTypeAsDouble());
      // disconnecting metal-R bond
      mol.removeBond(metal_idx, non_idx);
      // increment the cut bond count for this non-metal atom
      // and store the metal it was bonded to. We will need this
      // later to adjust the metal charge
      auto &value = nonMetals[non_idx];
      value.cutBonds += order;
      auto it = std::lower_bound(value.boundMetalIndices.begin(),
                                 value.boundMetalIndices.end(), metal_idx);
      if (it == value.boundMetalIndices.end() || *it != metal_idx) {
        value.boundMetalIndices.insert(it, metal_idx);
      }

      BOOST_LOG(rdInfoLog) << "Removed covalent bond between "
                           << metal->getSymbol() << " and "
                           << nonMetal->getSymbol() << "\n";
    }
    //	std::cout << "After removing bond and charge adjustment: " <<
    // MolToSmiles(mol) << std::endl;
  }
  if (d_options.adjustCharges) {
    adjust_charges(mol, nonMetals, metalChargeExcess);
  }
  if (d_options.removeHapticDummies) {
    remove_haptic_dummies(mol);
  }
}

void MetalDisconnector::adjust_charges(RDKit::RWMol &mol,
                                       std::map<int, NonMetal> &nonMetals,
                                       std::map<int, int> &metalChargeExcess) {
  for (auto it = nonMetals.begin(); it != nonMetals.end(); ++it) {
    auto a = mol.getAtomWithIdx(it->first);
    // do not blindly trust the original formal charge as it is often wrong
    // instead, find out the most appropriate formal charge based
    // on the allowed element valences and on its current valence/lone electrons
    // if there are no standard valences we assume the original
    // valence was correct and the non-metal element was neutral
    int valenceBeforeCut = a->getTotalValence();
    int radBeforeCut = a->getNumRadicalElectrons();
    int fcAfterCut = -it->second.cutBonds;
    int valenceAfterCut = 0;
    int loneElectrons = 0;
    const auto &valens =
        PeriodicTable::getTable()->getValenceList(a->getAtomicNum());
    if (!valens.empty() && valens.front() != -1) {
      for (auto v = valens.begin(); v != valens.end(); ++v) {
        valenceAfterCut = valenceBeforeCut + radBeforeCut - it->second.cutBonds;
        if (valenceAfterCut > *v) {
          if (v + 1 != valens.end()) {
            continue;
          }
          valenceAfterCut = *v;
          break;
        }
        fcAfterCut = valenceAfterCut - *v;
        // if there were radicals before and now we have
        // a negative formal charge, then it's a carbene-like
        // system (e.g., [Me]-[C]=O), or there was something silly
        // such as [Me]-[S]=X or [Me]-[P](X)(X)X
        if ((radBeforeCut % 2) && fcAfterCut < 0) {
          ++fcAfterCut;
          ++loneElectrons;
          // no radical doublets on N and higher
          a->setNumRadicalElectrons(a->getAtomicNum() < 7 ? radBeforeCut + 1
                                                          : 0);
        }
        break;
      }
    }
    // do not put a negative charge on sp2 carbon
    if (fcAfterCut == -1 &&
        valenceAfterCut == static_cast<int>(a->getTotalDegree()) + 1) {
      fcAfterCut = 0;
    }
    a->setFormalCharge(fcAfterCut);
    if (!it->second.cutBonds ||
        (fcAfterCut == -1 && a->getAtomicNum() == 6 &&
         valenceAfterCut == static_cast<int>(a->getTotalDegree()) + 2)) {
      // do not take electrons from the metal if it was a dative bond
      // (e.g., [C-]#[O+] coordinated to metal)
      fcAfterCut = 0;
    }
    std::sort(it->second.boundMetalIndices.begin(),
              it->second.boundMetalIndices.end(),
              [&metalChargeExcess](int a, int b) {
                return (metalChargeExcess.at(a) < metalChargeExcess.at(b));
              });
    fcAfterCut += loneElectrons;
    while (fcAfterCut < 0) {
      for (auto i : it->second.boundMetalIndices) {
        // if the bond was not dative, the non-metal stole electrons
        // from the metal(s), so we need to take electrons from
        // once-bonded metal(s)
        if (fcAfterCut++ >= 0) {
          break;
        }
        ++metalChargeExcess[i];
      }
    }
    a->updatePropertyCache();
  }
  // adjust formal charges of metal atoms
  for (auto it = metalChargeExcess.begin(); it != metalChargeExcess.end();
       ++it) {
    auto a = mol.getAtomWithIdx(it->first);
    auto currentFc = a->getFormalCharge();
    const auto &valens =
        PeriodicTable::getTable()->getValenceList(a->getAtomicNum());
    // valens should have at least -1 in it, as the atom data is currently
    // configured, so max_element should never return valens.end().
    auto max_valence = *std::max_element(valens.begin(), valens.end());
    // Don't go over the maximum real valence.
    if (max_valence != -1 && currentFc >= max_valence) {
      continue;
    }
    int fcAfterCut = it->second;
    if (currentFc > 0) {
      // if the original formal charge on the metal was positive, we trust it
      // and add it to the charge excess
      fcAfterCut += currentFc;
    }
    if (!valens.empty() && valens.front() != -1) {
      for (auto v = valens.begin(); v != valens.end(); ++v) {
        // Some metals (e.g. Mg and Ba) have -1 as a final catchall, which
        // is unhelpful for this.
        if (*v == -1) {
          continue;
        }
        if (fcAfterCut > *v) {
          auto next = v + 1;
          if (next != valens.end() && fcAfterCut >= *v) {
            continue;
          }
          fcAfterCut = *v;
          break;
        }
      }
    }
    if (fcAfterCut > currentFc) {
      a->setFormalCharge(fcAfterCut);
    }
    // make sure that radical electrons on metals are 0
    // and are not added to metals by sanitization
    // by setting NoImplicit to false
    a->setNumRadicalElectrons(0);
    a->setNumExplicitHs(0);
    a->setNoImplicit(false);
    a->updatePropertyCache();
  }
}

void MetalDisconnector::remove_haptic_dummies(RDKit::RWMol &mol) {
  std::vector<MatchVectType> matches;
  SubstructMatch(mol, *dp_metalDummy, matches);
  std::vector<unsigned int> dummiesToGo;
  for (const auto &match : matches) {
    int metal_idx = match[0].second;
    int dummy_idx = match[1].second;
    auto bond = mol.getBondBetweenAtoms(metal_idx, dummy_idx);
    std::string sprop;
    if (bond->getPropIfPresent(RDKit::common_properties::_MolFileBondEndPts,
                               sprop)) {
      if (sprop.length() > 4 && sprop[0] == '(' && sprop.back() == ')') {
        dummiesToGo.push_back(dummy_idx);
      }
    }
  }
  // The atom indices are recalculated after each atom removal, so take them
  // out in descending order. Bonds are taken out when the atom is removed.
  std::sort(dummiesToGo.begin(), dummiesToGo.end(), std::greater{});
  mol.beginBatchEdit();
  for (auto a : dummiesToGo) {
    mol.removeAtom(a);
  }
  mol.commitBatchEdit();
}

}  // namespace MolStandardize
}  // namespace RDKit