File: MolOps.cpp

package info (click to toggle)
rdkit 202503.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 222,000 kB
  • sloc: cpp: 411,111; python: 78,482; ansic: 26,181; java: 8,285; javascript: 4,404; sql: 2,393; yacc: 1,626; lex: 1,267; cs: 1,090; makefile: 581; xml: 229; fortran: 183; sh: 121
file content (1341 lines) | stat: -rw-r--r-- 45,615 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
//
//  Copyright (C) 2001-2023 Greg Landrum and other RDKit contributors
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <GraphMol/GraphMol.h>
#include <GraphMol/MolOps.h>
#include <GraphMol/Atom.h>
#include <GraphMol/AtomIterators.h>
#include <GraphMol/BondIterators.h>
#include <GraphMol/PeriodicTable.h>
#include <GraphMol/Chirality.h>
#include <GraphMol/RDKitQueries.h>

#include <vector>
#include <algorithm>

#include <RDGeneral/BoostStartInclude.h>

#include <boost/graph/connected_components.hpp>
#include <boost/graph/kruskal_min_spanning_tree.hpp>
#include <boost/graph/johnson_all_pairs_shortest.hpp>
#include <boost/version.hpp>
#if BOOST_VERSION >= 104000
#include <boost/property_map/property_map.hpp>
#else
#include <boost/property_map.hpp>
#endif

#include <RDGeneral/BoostEndInclude.h>

#include <boost/config.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/tokenizer.hpp>
#include <Geometry/point.h>
#include <GraphMol/QueryOps.h>
#include <GraphMol/ROMol.h>
#include <GraphMol/new_canon.h>
#include <GraphMol/FileParsers/MolSGroupParsing.h>

const int ci_LOCAL_INF = static_cast<int>(1e8);

namespace RDKit {
namespace MolOps {
namespace {
void nitrogenCleanup(RWMol &mol, Atom *atom) {
  // conversions here:
  // - neutral 5 coordinate Ns with double bonds to Os to the
  //   zwitterionic form.  e.g.:
  //   CN(=O)=O -> C[N+](=O)[O-]
  //   and:
  //   C1=CC=CN(=O)=C1 -> C1=CC=C[N+]([O-])=C1
  // - neutral 5 coordinate Ns with triple bonds to Ns to the
  //   zwitterionic form.  e.g.:
  //   C-N=N#N -> C-N=[N+]=[N-]

  PRECONDITION(atom, "bad atom");
  bool aromHolder;

  // we only want to do neutrals so that things like this don't get
  // munged:
  //  O=[n+]1occcc1
  // this was sf.net issue 1811276
  if (atom->getFormalCharge()) {
    return;
  }

  // we need to play this little aromaticity game because the
  // explicit valence code modifies its results for aromatic
  // atoms.
  aromHolder = atom->getIsAromatic();
  atom->setIsAromatic(0);
  // NOTE that we are calling calcExplicitValence() here, we do
  // this because we cannot be sure that it has already been
  // called on the atom (cleanUp() gets called pretty early in
  // the sanitization process):
  if (atom->calcExplicitValence(false) == 5) {
    unsigned int aid = atom->getIdx();
    for (const auto nbr : mol.atomNeighbors(atom)) {
      if ((nbr->getAtomicNum() == 8) && (nbr->getFormalCharge() == 0) &&
          (mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
           Bond::DOUBLE)) {
        // here's the double bonded oxygen
        auto b = mol.getBondBetweenAtoms(aid, nbr->getIdx());
        b->setBondType(Bond::SINGLE);
        atom->setFormalCharge(1);
        nbr->setFormalCharge(-1);
        break;
      } else if ((nbr->getAtomicNum() == 7) && (nbr->getFormalCharge() == 0) &&
                 (mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
                  Bond::TRIPLE)) {
        // here's the triple bonded nitrogen
        auto b = mol.getBondBetweenAtoms(aid, nbr->getIdx());
        b->setBondType(Bond::DOUBLE);
        atom->setFormalCharge(1);
        nbr->setFormalCharge(-1);
        break;
      }
    }  // end of loop over the first neigh
  }  // if this atom is 5 coordinate nitrogen
  // force a recalculation of the explicit valence here
  atom->setIsAromatic(aromHolder);
  atom->calcExplicitValence(false);
}

void phosphorusCleanup(RWMol &mol, Atom *atom) {
  // conversions here:
  // - neutral 5 coordinate Ps with one double bonds to an Os
  //   and one to a C or N to the zwitterionic form.  e.g.:
  //   C=P(=O)X -> C=[P+]([O-])X
  PRECONDITION(atom, "bad atom");

  // we only want to do neutrals
  if (atom->getFormalCharge()) {
    return;
  }

  // NOTE that we are calling calcExplicitValence() here, we do
  // this because we cannot be sure that it has already been
  // called on the atom (cleanUp() gets called pretty early in
  // the sanitization process):
  if (atom->calcExplicitValence(false) == 5 && atom->getDegree() == 3) {
    unsigned int aid = atom->getIdx();
    Bond *dbl_to_O = nullptr;
    Atom *O_atom = nullptr;
    bool hasDoubleToCorN = false;
    for (const auto nbr : mol.atomNeighbors(atom)) {
      if ((nbr->getAtomicNum() == 8) && (nbr->getFormalCharge() == 0) &&
          (mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
           Bond::DOUBLE)) {
        // here's the double bonded oxygen
        dbl_to_O = mol.getBondBetweenAtoms(aid, nbr->getIdx());
        O_atom = nbr;
      } else if ((nbr->getAtomicNum() == 6 || nbr->getAtomicNum() == 7) &&
                 (nbr->getDegree() >= 2) &&
                 (mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
                  Bond::DOUBLE)) {
        hasDoubleToCorN = true;
      }
    }  // end of loop over the first neigh
    if (hasDoubleToCorN && dbl_to_O != nullptr) {
      TEST_ASSERT(O_atom != nullptr);
      O_atom->setFormalCharge(-1);
      dbl_to_O->setBondType(Bond::SINGLE);
      atom->setFormalCharge(1);
    }
  }
  // force a recalculation of the explicit valence here
  atom->calcExplicitValence(false);
}

void halogenCleanup(RWMol &mol, Atom *atom) {
  PRECONDITION(atom, "bad atom");
  // Conversions done:
  //    X(=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
  //    X(=O)(=O)O -> [X+2]([O-])([O-])O
  //    X(=O)O -> [X+]([O-])O
  int ev = atom->calcExplicitValence(false);
  if (atom->getFormalCharge() == 0 && (ev == 7 || ev == 5 || ev == 3)) {
    bool neighborsAllO = true;
    for (const auto nbr : mol.atomNeighbors(atom)) {
      if (nbr->getAtomicNum() != 8) {
        neighborsAllO = false;
        break;
      }
    }
    if (neighborsAllO) {
      int formalCharge = 0;
      for (auto bond : mol.atomBonds(atom)) {
        if (bond->getBondType() == Bond::DOUBLE) {
          bond->setBondType(Bond::SINGLE);
          auto otherAtom = bond->getOtherAtom(atom);
          formalCharge++;
          otherAtom->setFormalCharge(-1);
          otherAtom->calcExplicitValence(false);
        }
      }
      atom->setFormalCharge(formalCharge);
      atom->calcExplicitValence(false);
    }
  }
}

bool isHypervalentNonMetal(Atom *atom) {
  if (QueryOps::isMetal(*atom)) {
    return false;
  }
  atom->updatePropertyCache(false);
  int ev = atom->getValence(Atom::ValenceType::EXPLICIT);
  // Check the explicit valence of the non-metal against the allowed
  // valences of the atom, adjusted by its formal charge.  This means that
  // N+ is treated the same as C, O+ the same as N.  This allows for,
  // for example, c1cccc[n+]1-[Fe] to be acceptable and not turned into
  // c1cccc[n+]1->[Fe].  After all, c1cccc[n+]1-C is ok.  Although this is
  // a poor example because c1ccccn1->[Fe] appears to be the normal
  // way that pyridine complexes with transition metals.  Heme b in
  // CHEBI:26355 is an example of when this is required.
  int effAtomicNum = atom->getAtomicNum() - atom->getFormalCharge();
  if (effAtomicNum <= 0) {
    return false;
  }
  // atom is a non-metal. If its explicit valence is greater than the
  // maximum allowed valence then it is hypervalent.
  //  We have a special case in here for aromatic atoms where the explicit
  //  valence matches the max allowed and the degree is 4. This is there for
  //  cases like cyclopentadienyl - metal systems. We need this special case
  //  because the explicit valence on the C atoms there ends up being 4
  const auto &otherValens =
      PeriodicTable::getTable()->getValenceList(effAtomicNum);
  auto maxV = otherValens.back();
  if (maxV > 0 && (ev > maxV || (ev == maxV && atom->getIsAromatic() &&
                                 atom->getTotalDegree() == 4))) {
    return true;
  }

  return false;
}

int numDativeBonds(const Atom *atom) {
  int numDatives = 0;
  auto &mol = atom->getOwningMol();
  for (auto bond : mol.atomBonds(atom)) {
    if (bond->getBondType() == Bond::BondType::DATIVE ||
        bond->getBondType() == Bond::BondType::DATIVEONE ||
        bond->getBondType() == Bond::BondType::DATIVEL ||
        bond->getBondType() == Bond::BondType::DATIVER) {
      ++numDatives;
    }
  }
  return numDatives;
}

// Returns true if the atom shouldn't do dative bonds.
bool noDative(const Atom *a) {
  static const std::set<int> noD{1, 2, 9, 10};
  return (noD.find(a->getAtomicNum()) != noD.end());
};

void metalBondCleanup(RWMol &mol, Atom *atom,
                      const std::vector<unsigned int> &ranks) {
  PRECONDITION(atom, "bad atom in metalBondCleanup");
  // The IUPAC recommendation for ligand->metal coordination bonds is that they
  // be single.  This upsets the RDKit valence model, as seen in CHEBI:26355,
  // heme b.  If the valence of a non-metal atom is above the maximum in the
  // RDKit model, and there are single bonds from it to metal
  // change those bonds to atom->metal dative.
  // If the atom is bonded to more than 1 metal atom, choose the one
  // with the fewer dative bonds incident on it, with the canonical
  // rank of the atoms as a tie-breaker.
  if (isHypervalentNonMetal(atom) && !noDative(atom)) {
    std::vector<Atom *> metals;
    // see if there are any metals bonded to it by a single bond
    for (auto bond : mol.atomBonds(atom)) {
      if (bond->getBondType() == Bond::BondType::SINGLE &&
          QueryOps::isMetal(*bond->getOtherAtom(atom))) {
        metals.push_back(bond->getOtherAtom(atom));
      }
    }
    if (!metals.empty()) {
      std::sort(metals.begin(), metals.end(),
                [&](const Atom *a1, const Atom *a2) -> bool {
                  int nda1 = numDativeBonds(a1);
                  int nda2 = numDativeBonds(a2);
                  if (nda1 == nda2) {
                    return ranks[a1->getIdx()] > ranks[a2->getIdx()];
                  } else {
                    return nda1 < nda2;
                  }
                });
      auto bond =
          mol.getBondBetweenAtoms(atom->getIdx(), metals.front()->getIdx());
      if (bond) {
        bond->setBondType(RDKit::Bond::BondType::DATIVE);
        bond->setBeginAtom(atom);
        bond->setEndAtom(metals.front());
      }
    }
  }
}
}  // namespace

void cleanUp(RWMol &mol) {
  for (auto atom : mol.atoms()) {
    switch (atom->getAtomicNum()) {
      case 7:
        nitrogenCleanup(mol, atom);
        break;
      case 15:
        phosphorusCleanup(mol, atom);
        break;
      case 17:
      case 35:
      case 53:
        halogenCleanup(mol, atom);
        break;
    }
  }
}

void cleanUpOrganometallics(RWMol &mol) {
  // At present all this does is look for single bonds between
  // non-metals and metals where the non-metal exceeds one of
  // its normal valence states, and replaces that bond with
  // a dative one from the non-metal to the metal.
  bool needsFixing = false;
  for (const auto atom : mol.atoms()) {
    if (isHypervalentNonMetal(atom) && !noDative(atom)) {
      // see if there are any metals bonded to it by a single bond
      for (auto bond : mol.atomBonds(atom)) {
        if (bond->getBondType() == Bond::BondType::SINGLE &&
            QueryOps::isMetal(*bond->getOtherAtom(atom))) {
          needsFixing = true;
          break;
        }
      }
    }
    if (needsFixing) {
      break;
    }
  }
  if (!needsFixing) {
    return;
  }

  mol.updatePropertyCache(false);
  // First see if anything needs doing
  std::vector<unsigned int> ranks(mol.getNumAtoms());
  RDKit::Canon::rankMolAtoms(mol, ranks);
  std::vector<std::pair<int, int>> atom_ranks;
  for (size_t i = 0; i < ranks.size(); ++i) {
    atom_ranks.push_back(std::make_pair(i, ranks[i]));
  }
  std::sort(atom_ranks.begin(), atom_ranks.end(),
            [](const std::pair<int, int> &p1, std::pair<int, int> &p2) -> bool {
              return p1.second < p2.second;
            });
  for (auto ar : atom_ranks) {
    auto atom = mol.getAtomWithIdx(ar.first);
    metalBondCleanup(mol, atom, ranks);
  }
}

void adjustHs(RWMol &mol) {
  //
  //  Go through and adjust the number of implicit and explicit Hs
  //  on each atom in the molecule.
  //
  //  Atoms that do not *need* explicit Hs
  //
  //  Assumptions: this is called after the molecule has been
  //  sanitized, aromaticity has been perceived, and the implicit
  //  valence of everything has been calculated.
  //
  for (auto atom : mol.atoms()) {
    int origImplicitV = atom->getValence(Atom::ValenceType::IMPLICIT);
    atom->calcExplicitValence(false);
    int origExplicitV = atom->getNumExplicitHs();

    int newImplicitV = atom->calcImplicitValence(false);
    //
    //  Case 1: The disappearing Hydrogen
    //    Smiles:  O=C1NC=CC2=C1C=CC=C2
    //
    //    after perception is done, the N atom has two aromatic
    //    bonds to it and a single implicit H.  When the Smiles is
    //    written, we get: n1ccc2ccccc2c1=O.  Here the nitrogen has
    //    no implicit Hs (because there are two aromatic bonds to
    //    it, giving it a valence of 3).  Also: this SMILES is bogus
    //    (un-kekulizable).  The correct SMILES would be:
    //    [nH]1ccc2ccccc2c1=O.  So we need to loop through the atoms
    //    and find those that have lost implicit H; we'll add those
    //    back as explicit Hs.
    //
    //    <phew> that takes way longer to comment than it does to
    //    write:
    if (newImplicitV < origImplicitV) {
      atom->setNumExplicitHs(origExplicitV + (origImplicitV - newImplicitV));
      atom->calcExplicitValence(false);
    }
  }
}

void assignRadicals(RWMol &mol) {
  for (auto atom : mol.atoms()) {
    // we only automatically assign radicals to atoms that
    // don't have implicit Hs:
    if (!atom->getNoImplicit() || !atom->getAtomicNum()) {
      continue;
    }
    const auto &valens =
        PeriodicTable::getTable()->getValenceList(atom->getAtomicNum());
    int chg = atom->getFormalCharge();
    int nOuter =
        PeriodicTable::getTable()->getNouterElecs(atom->getAtomicNum());
    if (valens.size() != 1 || valens[0] != -1) {
      double accum = 0.0;
      RWMol::OEDGE_ITER beg, end;
      boost::tie(beg, end) = mol.getAtomBonds(atom);
      while (beg != end) {
        accum += mol[*beg]->getValenceContrib(atom);
        ++beg;
      }
      accum += atom->getNumExplicitHs();
      int totalValence = static_cast<int>(accum + 0.1);
      int baseCount = 8;
      if (atom->getAtomicNum() == 1 || atom->getAtomicNum() == 2) {
        baseCount = 2;
      }

      // applies to later (more electronegative) elements:
      int numRadicals = baseCount - nOuter - totalValence + chg;
      if (numRadicals < 0) {
        numRadicals = 0;
        // can the atom be "hypervalent"?  (was github #447)
        const INT_VECT &valens =
            PeriodicTable::getTable()->getValenceList(atom->getAtomicNum());
        if (valens.size() > 1) {
          for (auto val : valens) {
            if (val - totalValence + chg >= 0) {
              numRadicals = val - totalValence + chg;
              break;
            }
          }
        }
      }
      // applies to earlier elements:
      int numRadicals2 = nOuter - totalValence - chg;
      if (numRadicals2 >= 0) {
        numRadicals = std::min(numRadicals, numRadicals2);
      }
      atom->setNumRadicalElectrons(numRadicals);
    } else {
      // #7122: if there's a bond to the metal center, then don't assign
      // radicals:
      if (atom->getDegree() > 0) {
        atom->setNumRadicalElectrons(0);
      } else {
        auto nValence = nOuter - chg;
        //  if this is an atom where we have no preferred valence info at all,
        //  e.g. for transition metals, then we shouldn't be guessing. This was
        //  #3330
        if (nValence < 0) {
          // this was github #5462
          nValence = 0;
          BOOST_LOG(rdWarningLog)
              << "Unusual charge on atom " << atom->getIdx()
              << " number of radical electrons set to zero" << std::endl;
        }
        atom->setNumRadicalElectrons(nValence % 2);
      }
    }
  }
}

MolOps::Hybridizations::Hybridizations(const ROMol &mol) {
  d_hybridizations.clear();
  // see if the mol already has computed hybridizations:

  if (mol.getNumAtoms() == 0) {
    return;
  }

  if ((*mol.atoms().begin())->getHybridization() !=
      Atom::HybridizationType::UNSPECIFIED) {
    for (auto atom : mol.atoms()) {
      d_hybridizations.push_back((int)atom->getHybridization());
    }
    return;
  }

  // compute them in a copy of the mol, so as not to change the mol passed in

  RWMol molCopy(mol);
  unsigned int operationThatFailed;
  unsigned int santitizeOps =
      MolOps::SANITIZE_SETCONJUGATION | MolOps::SANITIZE_SETHYBRIDIZATION;
  MolOps::sanitizeMol(molCopy, operationThatFailed, santitizeOps);
  for (auto atom : molCopy.atoms()) {
    // determine hybridization and remove chiral atoms that are not sp3
    d_hybridizations.push_back((int)atom->getHybridization());
  }
  return;
}

void cleanupAtropisomers(RWMol &mol) {
  auto hybs = MolOps::Hybridizations(mol);

  MolOps::cleanupAtropisomers(mol, hybs);
}

namespace {
void checkBond(RWMol &mol, Bond *bond, MolOps::Hybridizations &hybs) {
  if (!mol.getRingInfo()->isSssrOrBetter()) {
    RDKit::MolOps::findSSSR(mol);
  }
  const RingInfo *ri = mol.getRingInfo();
  if (hybs[bond->getBeginAtomIdx()] != Atom::SP2 ||
      hybs[bond->getEndAtomIdx()] != Atom::SP2 ||
      // do not clear bonds that part of a macrocycle
      // because they can be linking actual atropisomeric portions
      (ri->numBondRings(bond->getIdx()) > 0 &&
       ri->minBondRingSize(bond->getIdx()) < 8)) {
    bond->setStereo(Bond::BondStereo::STEREONONE);
  }
}
}  // namespace

void cleanupAtropisomers(RWMol &mol, MolOps::Hybridizations &hybs) {
  // make sure that ring info is available
  // (defensive, current calls have it available)
  for (auto bond : mol.bonds()) {
    switch (bond->getStereo()) {
      case Bond::BondStereo::STEREOATROPCW:
      case Bond::BondStereo::STEREOATROPCCW:
        checkBond(mol, bond, hybs);
        break;
      default:
        break;
    }
  }
}
void sanitizeMol(RWMol &mol) {
  unsigned int failedOp = 0;
  sanitizeMol(mol, failedOp, SANITIZE_ALL);
}
void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
                 unsigned int sanitizeOps) {
  // clear out any cached properties
  mol.clearComputedProps();

  operationThatFailed = SANITIZE_CLEANUP;
  if (sanitizeOps & operationThatFailed) {
    // clean up things like nitro groups
    cleanUp(mol);
  }

  // fix things like non-metal to metal bonds that should be dative.
  operationThatFailed = SANITIZE_CLEANUP_ORGANOMETALLICS;
  if (sanitizeOps & operationThatFailed) {
    cleanUpOrganometallics(mol);
  }

  // update computed properties on atoms and bonds:
  operationThatFailed = SANITIZE_PROPERTIES;
  if (sanitizeOps & operationThatFailed) {
    mol.updatePropertyCache(true);
  } else {
    mol.updatePropertyCache(false);
  }

  operationThatFailed = SANITIZE_SYMMRINGS;
  if (sanitizeOps & operationThatFailed) {
    VECT_INT_VECT arings;
    MolOps::symmetrizeSSSR(mol, arings);
  }

  // kekulizations
  operationThatFailed = SANITIZE_KEKULIZE;
  if (sanitizeOps & operationThatFailed) {
    Kekulize(mol);
  }

  // look for radicals:
  // We do this now because we need to know
  // that the N in [N]1C=CC=C1 has a radical
  // before we move into setAromaticity().
  // It's important that this happen post-Kekulization
  // because there's no way of telling what to do
  // with the same molecule if it's in the form
  // [n]1cccc1
  operationThatFailed = SANITIZE_FINDRADICALS;
  if (sanitizeOps & operationThatFailed) {
    assignRadicals(mol);
  }

  // then do aromaticity perception
  operationThatFailed = SANITIZE_SETAROMATICITY;
  if (sanitizeOps & operationThatFailed) {
    setAromaticity(mol);
  }

  // set conjugation
  operationThatFailed = SANITIZE_SETCONJUGATION;
  if (sanitizeOps & operationThatFailed) {
    setConjugation(mol);
  }

  // set hybridization
  operationThatFailed = SANITIZE_SETHYBRIDIZATION;
  if (sanitizeOps & operationThatFailed) {
    setHybridization(mol);
  }

  // remove bogus chirality specs:
  operationThatFailed = SANITIZE_CLEANUPCHIRALITY;
  if (sanitizeOps & operationThatFailed) {
    cleanupChirality(mol);
  }

  operationThatFailed = SANITIZE_CLEANUPATROPISOMERS;
  if (sanitizeOps & operationThatFailed) {
    cleanupAtropisomers(mol);
  }

  // adjust Hydrogen counts:
  operationThatFailed = SANITIZE_ADJUSTHS;
  if (sanitizeOps & operationThatFailed) {
    adjustHs(mol);
  }

  // now that everything has been cleaned up, go through and check/update the
  // computed valences on atoms and bonds one more time
  operationThatFailed = SANITIZE_PROPERTIES;
  if (sanitizeOps & operationThatFailed) {
    mol.updatePropertyCache(true);
  }
  operationThatFailed = 0;
}

std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
    const ROMol &imol, unsigned int sanitizeOps) {
  RWMol mol(imol);
  std::vector<std::unique_ptr<MolSanitizeException>> res;

  // clear out any cached properties
  mol.clearComputedProps();

  int operation;
  operation = SANITIZE_CLEANUP;
  if (sanitizeOps & operation) {
    // clean up things like nitro groups
    cleanUp(mol);
  }

  // update computed properties on atoms and bonds:
  operation = SANITIZE_PROPERTIES;
  if (sanitizeOps & operation) {
    for (auto &atom : mol.atoms()) {
      try {
        bool strict = true;
        atom->updatePropertyCache(strict);
      } catch (const MolSanitizeException &e) {
        res.emplace_back(e.copy());
      }
    }
  } else {
    mol.updatePropertyCache(false);
  }

  // kekulizations
  operation = SANITIZE_KEKULIZE;
  if (sanitizeOps & operation) {
    try {
      Kekulize(mol);
    } catch (const MolSanitizeException &e) {
      res.emplace_back(e.copy());
    }
  }
  return res;
}

namespace {
std::vector<std::unique_ptr<ROMol>> getTheFrags(
    const ROMol &mol, bool sanitizeFrags, INT_VECT *frags,
    VECT_INT_VECT *fragsMolAtomMapping, bool copyConformers) {
  std::unique_ptr<INT_VECT> mappingStorage;
  if (!frags) {
    mappingStorage.reset(new INT_VECT);
    frags = mappingStorage.get();
  }
  int nFrags = getMolFrags(mol, *frags);
  std::vector<std::unique_ptr<RWMol>> res;
  if (nFrags == 1) {
    res.emplace_back(new RWMol(mol));
    if (fragsMolAtomMapping) {
      INT_VECT comp;
      for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
        comp.push_back(idx);
      }
      (*fragsMolAtomMapping).push_back(comp);
    }
  } else {
    res.reserve(nFrags);
    for (int i = 0; i < nFrags; ++i) {
      boost::dynamic_bitset<> atomsInFrag(mol.getNumAtoms());
      INT_VECT comp;
      for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
        if ((*frags)[idx] == i) {
          comp.push_back(idx);
          atomsInFrag.set(idx);
        }
      }
      auto fragmentHasChallengingFeatures =
          [&](const INT_VECT &comp,
              const boost::dynamic_bitset<> &atomsInFrag) -> bool {
        for (auto idx : comp) {
          // check for atoms with stereochem:
          const auto atom = mol.getAtomWithIdx(idx);
          if (atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED &&
              atom->getChiralTag() != Atom::ChiralType::CHI_OTHER) {
            return true;
          }
          for (auto bnd : mol.atomBonds(atom)) {
            if (atomsInFrag[bnd->getOtherAtomIdx(idx)]) {
              if (bnd->getStereo() != Bond::BondStereo::STEREONONE &&
                  bnd->getStereo() != Bond::BondStereo::STEREOANY) {
                return true;
              }
            }
          }
        }
        for (auto sgroup : getSubstanceGroups(mol)) {
          for (auto aid : sgroup.getAtoms()) {
            if (atomsInFrag[aid]) {
              return true;
            }
          }
          for (auto aid : sgroup.getParentAtoms()) {
            if (atomsInFrag[aid]) {
              return true;
            }
          }
        }
        for (auto stereoGroup : mol.getStereoGroups()) {
          // doesn't seem like this should be necessary, but in case
          // we ever need stereogroups where the atoms aren't marked
          // with stereo...
          for (auto atom : stereoGroup.getAtoms()) {
            if (atomsInFrag[atom->getIdx()]) {
              return true;
            }
          }
          // same check for stereo groups involving bonds:
          for (auto bond : stereoGroup.getBonds()) {
            if (atomsInFrag[bond->getBeginAtomIdx()] &&
                atomsInFrag[bond->getEndAtomIdx()]) {
              return true;
            }
          }
        }
        return false;
      };
      if (comp.size() == 1 ||
          (nFrags > 3 && !fragmentHasChallengingFeatures(comp, atomsInFrag))) {
        // special case for a small, simple fragments when a bunch of fragments
        // are present. The check on the number of fragments is purely
        // empirical. This is mainly intended to catch situations like proteins
        // where you have a bunch of single-atom fragments (waters); the
        // standard approach below ends up being horribly inefficient there
        res.emplace_back(new RWMol());
        auto &frag = res.back();
        std::map<unsigned int, unsigned int> atomIdxMap;
        for (auto aid : comp) {
          atomIdxMap[aid] =
              frag->addAtom(mol.getAtomWithIdx(aid)->copy(), false, true);
        }
        for (auto bond : mol.bonds()) {
          if (atomsInFrag[bond->getBeginAtomIdx()] &&
              atomsInFrag[bond->getEndAtomIdx()]) {
            auto bondCopy = bond->copy();
            bondCopy->setBeginAtomIdx(atomIdxMap[bond->getBeginAtomIdx()]);
            bondCopy->setEndAtomIdx(atomIdxMap[bond->getEndAtomIdx()]);
            frag->addBond(bondCopy, true);
          }
        }
        if (copyConformers) {
          for (auto cit = mol.beginConformers(); cit != mol.endConformers();
               ++cit) {
            auto *conf = new Conformer(frag->getNumAtoms());
            conf->setId((*cit)->getId());
            conf->set3D((*cit)->is3D());
            unsigned int cidx = 0;
            for (auto ai : comp) {
              conf->setAtomPos(cidx++, (*cit)->getAtomPos(ai));
            }
            frag->addConformer(conf);
          }
        }
      } else {
        res.emplace_back(new RWMol(mol));
        auto &frag = res.back();

        frag->beginBatchEdit();
        for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
          if (!atomsInFrag[idx]) {
            frag->removeAtom(idx);
          }
        }
        frag->commitBatchEdit();
      }
      if (fragsMolAtomMapping) {
        (*fragsMolAtomMapping).push_back(comp);
      }
    }
  }
  if (!copyConformers) {
    for (auto &frag : res) {
      frag->clearConformers();
    }
  }

  if (sanitizeFrags) {
    for (auto &frag : res) {
      sanitizeMol(*frag);
    }
  }

  std::vector<std::unique_ptr<ROMol>> finalRes;
  for (auto &r : res) {
    finalRes.emplace_back(r.get());
    r.release();
  }
  return finalRes;
}
}  // namespace
std::vector<ROMOL_SPTR> getMolFrags(const ROMol &mol, bool sanitizeFrags,
                                    INT_VECT *frags,
                                    VECT_INT_VECT *fragsMolAtomMapping,
                                    bool copyConformers) {
  auto upFrags = getTheFrags(mol, sanitizeFrags, frags, fragsMolAtomMapping,
                             copyConformers);
  std::vector<boost::shared_ptr<ROMol>> finalRes;
  for (auto &r : upFrags) {
    finalRes.emplace_back(r.get());
    r.release();
  }
  return finalRes;
}

unsigned int getMolFrags(const ROMol &mol, INT_VECT &mapping) {
  unsigned int natms = mol.getNumAtoms();
  mapping.resize(natms);
  return natms ? boost::connected_components(mol.getTopology(), &mapping[0])
               : 0;
};

unsigned int getMolFrags(const ROMol &mol, VECT_INT_VECT &frags) {
  frags.clear();
  INT_VECT mapping;
  getMolFrags(mol, mapping);

  INT_INT_VECT_MAP comMap;
  for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
    int mi = mapping[i];
    if (comMap.find(mi) == comMap.end()) {
      INT_VECT comp;
      comMap[mi] = comp;
    }
    comMap[mi].push_back(i);
  }

  for (INT_INT_VECT_MAP_CI mci = comMap.begin(); mci != comMap.end(); mci++) {
    frags.push_back((*mci).second);
  }
  return rdcast<unsigned int>(frags.size());
}

unsigned int getMolFrags(const ROMol &mol,
                         std::vector<std::unique_ptr<ROMol>> &molFrags,
                         bool sanitizeFrags, std::vector<int> *frags,
                         std::vector<std::vector<int>> *fragsMolAtomMapping,
                         bool copyConformers) {
  molFrags = getTheFrags(mol, sanitizeFrags, frags, fragsMolAtomMapping,
                         copyConformers);
  return rdcast<unsigned int>(molFrags.size());
}

namespace {
template <typename T>
std::map<T, std::unique_ptr<ROMol>> getTheFragsWithQuery(
    const ROMol &mol, T (*query)(const ROMol &, const Atom *),
    bool sanitizeFrags, const std::vector<T> *whiteList, bool negateList) {
  std::vector<T> assignments(mol.getNumAtoms());
  std::vector<int> ids(mol.getNumAtoms(), -1);
  std::map<T, std::unique_ptr<ROMol>> res;
  for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
    T where = query(mol, mol.getAtomWithIdx(i));
    if (whiteList) {
      bool found = std::find(whiteList->begin(), whiteList->end(), where) !=
                   whiteList->end();
      if (!found && !negateList) {
        continue;
      } else if (found && negateList) {
        continue;
      }
    }
    assignments[i] = where;
    if (res.find(where) == res.end()) {
      res[where] = std::unique_ptr<ROMol>(new ROMol());
    }
    auto *frag = static_cast<RWMol *>(res[where].get());
    ids[i] = frag->addAtom(mol.getAtomWithIdx(i)->copy(), false, true);
    // loop over neighbors and add bonds in the fragment to all atoms
    // that are already in the same fragment
    ROMol::ADJ_ITER nbrIdx, endNbrs;
    boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(mol.getAtomWithIdx(i));
    while (nbrIdx != endNbrs) {
      if (*nbrIdx < i && assignments[*nbrIdx] == where) {
        Bond *nBond = mol.getBondBetweenAtoms(i, *nbrIdx)->copy();
        nBond->setOwningMol(static_cast<ROMol *>(frag));
        nBond->setBeginAtomIdx(ids[nBond->getBeginAtomIdx()]);
        nBond->setEndAtomIdx(ids[nBond->getEndAtomIdx()]);
        frag->addBond(nBond, true);
      }
      ++nbrIdx;
    }
  }
  // update conformers
  for (auto cit = mol.beginConformers(); cit != mol.endConformers(); ++cit) {
    for (auto iter = res.begin(); iter != res.end(); ++iter) {
      auto &newM = iter->second;
      auto *conf = new Conformer(newM->getNumAtoms());
      conf->setId((*cit)->getId());
      conf->set3D((*cit)->is3D());
      newM->addConformer(conf);
    }
    for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
      if (ids[i] < 0) {
        continue;
      }
      res[assignments[i]]
          ->getConformer((*cit)->getId())
          .setAtomPos(ids[i], (*cit)->getAtomPos(i));
    }
  }
  if (sanitizeFrags) {
    for (auto iter = res.begin(); iter != res.end(); ++iter) {
      sanitizeMol(*static_cast<RWMol *>(iter->second.get()));
    }
  }
  return res;
}
}  // namespace

template <typename T>
std::map<T, boost::shared_ptr<ROMol>> getMolFragsWithQuery(
    const ROMol &mol, T (*query)(const ROMol &, const Atom *),
    bool sanitizeFrags, const std::vector<T> *whiteList, bool negateList) {
  PRECONDITION(query, "no query");

  auto rawRes =
      getTheFragsWithQuery(mol, query, sanitizeFrags, whiteList, negateList);
  std::map<T, boost::shared_ptr<ROMol>> res;
  for (auto &it : rawRes) {
    res.insert(std::make_pair(it.first, it.second.get()));
    it.second.release();
  }
  return res;
}
template RDKIT_GRAPHMOL_EXPORT std::map<std::string, boost::shared_ptr<ROMol>>
getMolFragsWithQuery(const ROMol &mol,
                     std::string (*query)(const ROMol &, const Atom *),
                     bool sanitizeFrags, const std::vector<std::string> *,
                     bool);
template RDKIT_GRAPHMOL_EXPORT std::map<int, boost::shared_ptr<ROMol>>
getMolFragsWithQuery(const ROMol &mol,
                     int (*query)(const ROMol &, const Atom *),
                     bool sanitizeFrags, const std::vector<int> *, bool);
template RDKIT_GRAPHMOL_EXPORT std::map<unsigned int, boost::shared_ptr<ROMol>>
getMolFragsWithQuery(const ROMol &mol,
                     unsigned int (*query)(const ROMol &, const Atom *),
                     bool sanitizeFrags, const std::vector<unsigned int> *,
                     bool);

template <typename T>
unsigned int getMolFragsWithQuery(const ROMol &mol,
                                  T (*query)(const ROMol &, const Atom *),
                                  std::map<T, std::unique_ptr<ROMol>> &molFrags,
                                  bool sanitizeFrags,
                                  const std::vector<T> *whiteList,
                                  bool negateList) {
  PRECONDITION(query, "no query");

  molFrags =
      getTheFragsWithQuery(mol, query, sanitizeFrags, whiteList, negateList);
  return rdcast<unsigned int>(molFrags.size());
}
template RDKIT_GRAPHMOL_EXPORT unsigned int getMolFragsWithQuery(
    const ROMol &mol, std::string (*query)(const ROMol &, const Atom *),
    std::map<std::string, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags,
    const std::vector<std::string> *, bool);
template RDKIT_GRAPHMOL_EXPORT unsigned int getMolFragsWithQuery(
    const ROMol &mol, int (*query)(const ROMol &, const Atom *),
    std::map<int, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags,
    const std::vector<int> *, bool);
template RDKIT_GRAPHMOL_EXPORT unsigned int getMolFragsWithQuery(
    const ROMol &mol, unsigned int (*query)(const ROMol &, const Atom *),
    std::map<unsigned int, std::unique_ptr<ROMol>> &molFrags,
    bool sanitizeFrags, const std::vector<unsigned int> *, bool);

int getFormalCharge(const ROMol &mol) {
  int accum = 0;
  for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms();
       atomIt != mol.endAtoms(); ++atomIt) {
    accum += (*atomIt)->getFormalCharge();
  }
  return accum;
};

unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop) {
  unsigned numPropAtoms = 0;
  for (const auto atom : mol.atoms()) {
    if (atom->hasProp(prop)) {
      ++numPropAtoms;
    }
  }
  return numPropAtoms;
}

ROMol *hapticBondsToDative(const ROMol &mol) {
  auto *res = new RWMol(mol);
  hapticBondsToDative(*res);
  return static_cast<ROMol *>(res);
}

void hapticBondsToDative(RWMol &mol) {
  std::vector<unsigned int> dummiesToGo;
  std::vector<std::pair<unsigned int, unsigned int>> bondsToAdd;
  mol.beginBatchEdit();
  for (const auto &bond : mol.bonds()) {
    if (bond->getBondType() == Bond::BondType::DATIVE) {
      auto oats = details::hapticBondEndpoints(bond);
      if (oats.empty()) {
        continue;
      }
      Atom *dummy = nullptr;
      Atom *metal = nullptr;
      if (bond->getBeginAtom()->getAtomicNum() == 0) {
        dummy = bond->getBeginAtom();
        metal = bond->getEndAtom();
      } else if (bond->getEndAtom()->getAtomicNum() == 0) {
        metal = bond->getBeginAtom();
        dummy = bond->getEndAtom();
      }
      if (dummy == nullptr) {
        continue;
      }
      for (auto oat : oats) {
        auto atom = mol.getAtomWithIdx(oat);
        if (atom) {
          mol.addBond(atom, metal, Bond::DATIVE);
        }
      }
      mol.removeAtom(dummy);
    }
  }
  mol.commitBatchEdit();
}

ROMol *dativeBondsToHaptic(const ROMol &mol) {
  auto *res = new RWMol(mol);
  dativeBondsToHaptic(*res);
  return static_cast<ROMol *>(res);
}

namespace {
// return sets of contiguous atoms of more than 1 atom that are in
// allAts.
std::vector<std::vector<unsigned int>> contiguousAtoms(
    const ROMol &mol, const std::vector<unsigned int> &allAts) {
  std::vector<std::vector<unsigned int>> contigAts;
  std::vector<char> doneAts(mol.getNumAtoms(), 0);
  std::vector<char> inAllAts(mol.getNumAtoms(), 0);
  for (auto a : allAts) {
    inAllAts[a] = 1;
  }
  for (size_t i = 0; i < allAts.size(); ++i) {
    if (doneAts[allAts[i]]) {
      continue;
    }
    contigAts.push_back(std::vector<unsigned int>());
    std::list<const Atom *> toDo{mol.getAtomWithIdx(allAts[i])};
    while (!toDo.empty()) {
      auto nextAt = toDo.front();
      toDo.pop_front();
      if (!doneAts[nextAt->getIdx()]) {
        doneAts[nextAt->getIdx()] = 1;
        contigAts.back().push_back(nextAt->getIdx());
      }
      for (const auto &nbri :
           boost::make_iterator_range(mol.getAtomNeighbors(nextAt))) {
        if (inAllAts[nbri] && !doneAts[nbri]) {
          toDo.push_back(mol.getAtomWithIdx(nbri));
        }
      }
    }
    if (contigAts.back().size() < 2) {
      contigAts.pop_back();
    }
  }
  return contigAts;
}

// add to the molecule a dummy atom centred on the
// atoms passed in, with a dative bond from it to the metal atom.
void addHapticBond(RWMol &mol, unsigned int metalIdx,
                   std::vector<unsigned int> hapticAtoms) {
  // So there is a * in the V3000 file as the symbol for the atom.
  auto dummyAt = new QueryAtom(0);
  dummyAt->setQuery(makeAtomNullQuery());

  bool updateLabel = true;
  bool takeOwnwership = true;
  unsigned int dummyIdx = mol.addAtom(dummyAt, updateLabel, takeOwnwership);
  for (auto i = 0u; i < mol.getNumConformers(); ++i) {
    auto &conf = mol.getConformer(i);
    RDGeom::Point3D dummyPos;
    for (auto ha : hapticAtoms) {
      auto haPos = conf.getAtomPos(ha);
      dummyPos += haPos;
    }
    dummyPos /= hapticAtoms.size();
    conf.setAtomPos(dummyIdx, dummyPos);
  }
  unsigned int numbonds = mol.addBond(dummyIdx, metalIdx, Bond::DATIVE);
  auto bond = mol.getBondWithIdx(numbonds - 1);

  // Get the atom numbers for the end points.  First number is the
  // count, the rest count from 1.
  std::ostringstream oss;
  oss << "(" << hapticAtoms.size() << " ";
  for (auto ha : hapticAtoms) {
    oss << ha + 1 << " ";
  }
  std::string endpts{oss.str()};
  if (endpts.back() == ' ') {
    endpts = endpts.substr(0, endpts.length() - 1);
  }
  endpts += ")";
  bond->setProp(common_properties::_MolFileBondEndPts, endpts);
  bond->setProp<std::string>(common_properties::_MolFileBondAttach, "ALL");
}
}  // namespace

void dativeBondsToHaptic(RWMol &mol) {
  // First collect all the atoms that have a dative bond to them.
  // Assume that the ones of interest will have a metal as their
  // end atoms.
  std::map<unsigned int, std::vector<unsigned int>> dativeAtoms;
  for (const auto &b : mol.bonds()) {
    if (b->getBondType() == Bond::DATIVE) {
      auto ins = dativeAtoms.find(b->getEndAtomIdx());
      if (ins == dativeAtoms.end()) {
        dativeAtoms.insert(
            std::make_pair(b->getEndAtomIdx(),
                           std::vector<unsigned int>{b->getBeginAtomIdx()}));
      } else {
        ins->second.push_back(b->getBeginAtomIdx());
      }
    }
  }

  mol.beginBatchEdit();
  for (auto &dativeSet : dativeAtoms) {
    // Find the sets of contiguous atoms in the dativeAtoms lists.  Each one
    // will be the EndPts of a haptic bond going to the metal atom that is
    // dativeSet.first.
    auto contigAtoms = contiguousAtoms(mol, dativeSet.second);
    for (const auto &ca : contigAtoms) {
      addHapticBond(mol, dativeSet.first, ca);
      for (auto cat : ca) {
        mol.removeBond(dativeSet.first, cat);
      }
    }
  }
  mol.commitBatchEdit();
}

namespace details {
std::vector<int> hapticBondEndpoints(const Bond *bond) {
  // This would ideally use ParseV3000Array but I'm buggered if I can get
  // the linker to find it.  The issue, I think, is that it's in the
  // FileParsers library which is built after GraphMol so not available
  // to link in.  It can't be built first because it needs GraphMol.
  //      std::vector<unsigned int> oats =
  //          RDKit::SGroupParsing::ParseV3000Array<unsigned int>(endpts);
  // Returns the atom indices i.e. subtracts 1 from the numbers in the prop.
  std::vector<int> oats;
  std::string endpts;
  if (bond->getPropIfPresent(common_properties::_MolFileBondEndPts, endpts)) {
    if ('(' == endpts.front() && ')' == endpts.back()) {
      endpts = endpts.substr(1, endpts.length() - 2);
      boost::char_separator<char> sep(" ");
      boost::tokenizer<boost::char_separator<char>> tokens(endpts, sep);
      auto beg = tokens.begin();
      ++beg;
      std::transform(beg, tokens.end(), std::back_inserter(oats),
                     [](const std::string &a) { return std::stod(a) - 1; });
    }
  }
  return oats;
}
}  // end of namespace details

namespace details {
unsigned int addExplicitAttachmentPoint(RWMol &mol, unsigned int atomIdx,
                                        unsigned int val, bool addAsQuery,
                                        bool addCoords) {
  Atom *newAtom = nullptr;
  if (addAsQuery) {
    newAtom = new QueryAtom(0);
    newAtom->setQuery(RDKit::makeAtomNullQuery());
  } else {
    newAtom = new Atom(0);
  }
  newAtom->setProp(common_properties::_fromAttachPoint, val);
  bool updateLabel = false;
  bool takeOwnership = true;
  auto idx = mol.addAtom(newAtom, updateLabel, takeOwnership);
  mol.addBond(atomIdx, idx, Bond::SINGLE);
  mol.getAtomWithIdx(idx)->updatePropertyCache(false);
  if (addCoords) {
    setTerminalAtomCoords(mol, idx, atomIdx);
  }
  return idx;
}

bool isAttachmentPoint(const Atom *atom, bool markedOnly) {
  PRECONDITION(atom, "bad atom");
  PRECONDITION(atom->hasOwningMol(), "atom not associated with a molecule");
  if (atom->getAtomicNum() != 0 || atom->getDegree() != 1) {
    return false;
  }
  if (markedOnly && !atom->hasProp(common_properties::_fromAttachPoint)) {
    return false;
  }
  // we know that the atom is degree 1
  const auto bond = *atom->getOwningMol().atomBonds(atom).begin();
  if ((bond->getBondType() != Bond::BondType::SINGLE &&
       bond->getBondType() != Bond::BondType::UNSPECIFIED) ||
      bond->getBondDir() != Bond::BondDir::NONE) {
    return false;
  }

  if (atom->hasQuery()) {
    // a * from SMARTS
    if (!atom->getQuery()->getNegation() &&
        atom->getQuery()->getDescription() == "AtomNull") {
      return true;
    }
    // a * from CXSMILES
    if (atom->getQuery()->getNegation() &&
        atom->getQuery()->getDescription() == "AtomAtomicNum" &&
        static_cast<ATOM_EQUALS_QUERY *>(atom->getQuery())->getVal() == 1) {
      return true;
    }
    return false;
  }

  return true;
}

}  // namespace details

void expandAttachmentPoints(RWMol &mol, bool addAsQueries, bool addCoords) {
  for (auto atom : mol.atoms()) {
    int value;
    if (atom->getPropIfPresent(common_properties::molAttachPoint, value)) {
      std::vector<int> tgtVals;
      if (value == 1 || value == -1) {
        tgtVals.push_back(1);
      }
      if (value == 2 || value == -1) {
        tgtVals.push_back(2);
      }
      if (tgtVals.empty()) {
        BOOST_LOG(rdWarningLog)
            << "Invalid value for molAttachPoint: " << value << " on atom "
            << atom->getIdx() << ". Not expanding this atttachment point."
            << std::endl;
        continue;
      }
      for (auto tval : tgtVals) {
        atom->clearProp(common_properties::molAttachPoint);
        details::addExplicitAttachmentPoint(mol, atom->getIdx(), tval,
                                            addAsQueries, addCoords);
      }
    }
  }
}

void collapseAttachmentPoints(RWMol &mol, bool markedOnly) {
  bool removedAny = false;
  std::vector<int> attachLabels(mol.getNumAtoms(), 0);

  for (auto atom : mol.atoms()) {
    if (details::isAttachmentPoint(atom, markedOnly)) {
      int value = 0;
      atom->getPropIfPresent(common_properties::_fromAttachPoint, value);
      if (markedOnly && (value < 0 || value > 2)) {
        BOOST_LOG(rdWarningLog)
            << "Invalid value for _fromAttachPoint: " << value << " on atom "
            << atom->getIdx() << ". Not collapsing this atom" << std::endl;
        continue;
      }
      if (!markedOnly && !value) {
        value = 1;
      }
      auto bond = *mol.atomBonds(atom).begin();
      if ((bond->getBondType() != Bond::BondType::SINGLE &&
           bond->getBondType() != Bond::BondType::UNSPECIFIED) ||
          bond->getBondDir() != Bond::BondDir::NONE) {
        continue;
      }
      auto oAtomIdx = bond->getOtherAtom(atom)->getIdx();
      if (attachLabels[oAtomIdx]) {
        if (attachLabels[oAtomIdx] != -1) {
          value = -1;
        } else {
          BOOST_LOG(rdWarningLog)
              << "More than two attachment points on atom " << oAtomIdx
              << ". Attachment point " << atom->getIdx()
              << " will not be collapsed." << std::endl;
          continue;
        }
      }
      if (!removedAny) {
        mol.beginBatchEdit();
        removedAny = true;
      }
      attachLabels[oAtomIdx] = value;
      mol.removeAtom(atom);
    }
  }
  // set the attachment point labels
  for (auto atom : mol.atoms()) {
    if (attachLabels[atom->getIdx()]) {
      atom->setProp(common_properties::molAttachPoint,
                    attachLabels[atom->getIdx()]);
    }
  }
  if (removedAny) {
    mol.commitBatchEdit();
  }
}
}  // end of namespace MolOps
}  // end of namespace RDKit