// // Copyright (C) 2014 Novartis Institutes for BioMedical Research // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include "../QueryAtom.h" #include "../QueryBond.h" #include "../SmilesParse/SmilesWrite.h" #include "../SmilesParse/SmartsWrite.h" #include "../SmilesParse/SmilesParse.h" #include "../Substruct/SubstructMatch.h" #include "SubstructMatchCustom.h" #include "MaximumCommonSubgraph.h" #include #include #include #include namespace RDKit { namespace FMCS { struct LabelDefinition { unsigned int ItemIndex; // item with this label value unsigned int Value; LabelDefinition() : ItemIndex(NotSet), Value(NotSet) {} LabelDefinition(unsigned int i, unsigned int value) : ItemIndex(i), Value(value) {} }; MaximumCommonSubgraph::MaximumCommonSubgraph(const MCSParameters *params) { Parameters = detail::MCSParametersInternal(params); if (!Parameters.ProgressCallback) { Parameters.ProgressCallback = MCSProgressCallbackTimeout; Parameters.ProgressCallbackUserData = &To; } if (Parameters.AtomCompareParameters.MatchChiralTag) { Parameters.BondCompareParameters.MatchStereo = true; } QueryMoleculeMatchedBonds = 0; QueryMoleculeMatchedAtoms = 0; QueryMoleculeSingleMatchedAtom = nullptr; To = nanoClock(); } static bool molPtr_NumBondLess( const ROMol *l, const ROMol *r) { // need for sorting the source molecules by size return l->getNumBonds() < r->getNumBonds(); } void MaximumCommonSubgraph::init(size_t startIdx) { QueryMolecule = Molecules.at(startIdx); Targets.clear(); #ifdef FAST_SUBSTRUCT_CACHE QueryAtomLabels.clear(); QueryBondLabels.clear(); QueryAtomMatchTable.clear(); QueryBondMatchTable.clear(); #endif #ifdef DUP_SUBSTRUCT_CACHE DuplicateCache.clear(); #endif size_t nq = 0; #ifdef FAST_SUBSTRUCT_CACHE // fill out match tables nq = QueryMolecule->getNumAtoms(); QueryAtomMatchTable.resize(nq, nq); for (size_t aj = 0; aj < nq; aj++) { for (size_t ai = 0; ai < nq; ai++) { QueryAtomMatchTable.set( ai, aj, Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, ai, *QueryMolecule, aj, Parameters.CompareFunctionsUserData)); } } nq = QueryMolecule->getNumBonds(); QueryBondMatchTable.resize(nq, nq); for (size_t aj = 0; aj < nq; aj++) { for (size_t ai = 0; ai < nq; ai++) { QueryBondMatchTable.set( ai, aj, Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, ai, *QueryMolecule, aj, Parameters.CompareFunctionsUserData)); } } // Compute label values based on current functor and parameters for code // Morgan correct computation. unsigned int currentLabelValue = 1; std::vector labels; nq = QueryMolecule->getNumAtoms(); QueryAtomLabels.resize(nq, NotSet); for (size_t ai = 0; ai < nq; ++ai) { if (MCSAtomCompareAny == Parameters.AtomTyper) { // predefined functor without atom compare // parameters QueryAtomLabels[ai] = 1; } else { const auto atom = QueryMolecule->getAtomWithIdx(ai); if (MCSAtomCompareElements == Parameters.AtomTyper) { // predefined functor without atom compare // parameters QueryAtomLabels[ai] = atom->getAtomicNum() | (Parameters.AtomCompareParameters.MatchValences ? (atom->getTotalValence() >> 8) : 0); } else if (MCSAtomCompareIsotopes == Parameters.AtomTyper) { // predefined functor without atom // compare parameters QueryAtomLabels[ai] = atom->getAtomicNum() | (atom->getIsotope() >> 8) | (Parameters.AtomCompareParameters.MatchValences ? (atom->getTotalValence() >> 16) : 0); } else { // custom user defined functor QueryAtomLabels[ai] = NotSet; for (auto &label : labels) { if (Parameters.AtomTyper( Parameters.AtomCompareParameters, *QueryMolecule, label.ItemIndex, *QueryMolecule, ai, Parameters.CompareFunctionsUserData)) { // equal items QueryAtomLabels[ai] = label.Value; break; } } if (NotSet == QueryAtomLabels.at(ai)) { // not found -> create new label QueryAtomLabels[ai] = ++currentLabelValue; labels.emplace_back(ai, currentLabelValue); } } } } labels.clear(); currentLabelValue = 1; nq = QueryMolecule->getNumBonds(); QueryBondLabels.resize(nq, NotSet); for (size_t aj = 0; aj < nq; ++aj) { const Bond *bond = QueryMolecule->getBondWithIdx(aj); unsigned ring = 0; if (!Parameters.CompareFunctionsUserData && (Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly)) { // is bond in ring ring = QueryMolecule->getRingInfo()->numBondRings(aj) ? 0 : 1; } if (MCSBondCompareAny == Parameters.BondTyper) { QueryBondLabels[aj] = 1 | (ring >> 8); } else if (MCSBondCompareOrderExact == Parameters.BondTyper) { QueryBondLabels[aj] = (bond->getBondType() + 1) | (ring >> 8); } else if (MCSBondCompareOrder == Parameters.BondTyper) { auto order = bond->getBondType(); if (Bond::AROMATIC == order || Bond::ONEANDAHALF == order) { // ignore Aromatization order = Bond::SINGLE; } else if (Bond::TWOANDAHALF == order) { order = Bond::DOUBLE; } else if (Bond::THREEANDAHALF == order) { order = Bond::TRIPLE; } else if (Bond::FOURANDAHALF == order) { order = Bond::QUADRUPLE; } else if (Bond::FIVEANDAHALF == order) { order = Bond::QUINTUPLE; } QueryBondLabels[aj] = (order + 1) | (ring >> 8); } else { // custom user defined functor QueryBondLabels[aj] = NotSet; for (const auto &label : labels) { if (Parameters.BondTyper( Parameters.BondCompareParameters, *QueryMolecule, label.ItemIndex, *QueryMolecule, aj, Parameters .CompareFunctionsUserData)) { // equal bonds + ring ... QueryBondLabels[aj] = label.Value; break; } } if (NotSet == QueryBondLabels.at(aj)) { // not found -> create new label QueryBondLabels[aj] = ++currentLabelValue; labels.emplace_back(aj, currentLabelValue); } } } #endif Targets.resize(Molecules.size() - 1); size_t i = 0; for (auto it = Molecules.begin() + 1; it != Molecules.end(); it++, i++) { Targets[i].Molecule = *it; // build Target Topology ADD ATOMs for (const auto &a : Targets.at(i).Molecule->atoms()) { Targets[i].Topology.addAtom(a->getIdx()); } // build Target Topology ADD BONDs for (const auto &b : Targets.at(i).Molecule->bonds()) { auto ii = b->getBeginAtomIdx(); auto jj = b->getEndAtomIdx(); Targets[i].Topology.addBond(b->getIdx(), ii, jj); } // fill out match tables size_t nq = QueryMolecule->getNumAtoms(); size_t nt = Targets.at(i).Molecule->getNumAtoms(); Targets[i].AtomMatchTable.resize(nq, nt); for (size_t aj = 0; aj < nt; aj++) { for (size_t ai = 0; ai < nq; ai++) { Targets[i].AtomMatchTable.set( ai, aj, Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, ai, *Targets.at(i).Molecule, aj, Parameters.CompareFunctionsUserData)); } } nq = QueryMolecule->getNumBonds(); nt = Targets.at(i).Molecule->getNumBonds(); Targets[i].BondMatchTable.resize(nq, nt); for (size_t aj = 0; aj < nt; aj++) { for (size_t ai = 0; ai < nq; ai++) { Targets[i].BondMatchTable.set( ai, aj, Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, ai, *Targets.at(i).Molecule, aj, Parameters.CompareFunctionsUserData)); } } } } struct WeightedBond { const Bond *BondPtr{nullptr}; unsigned int Weight{0}; WeightedBond() {} WeightedBond(const Bond *bond) : BondPtr(bond), Weight(0) { const auto ringInfo = bond->getOwningMol().getRingInfo(); // score ((bond.is_in_ring + atom1.is_in_ring + atom2.is_in_ring) if (ringInfo->numBondRings(bond->getIdx())) { ++Weight; } if (ringInfo->numAtomRings(bond->getBeginAtomIdx())) { ++Weight; } if (ringInfo->numAtomRings(bond->getEndAtomIdx())) { ++Weight; } } bool operator<(const WeightedBond &r) { return Weight >= r.Weight; // sort in Z-A order (Rings first) } }; void MaximumCommonSubgraph::makeInitialSeeds() { // build a set of initial seeds as "all" single bonds from query // molecule boost::dynamic_bitset<> excludedBonds(QueryMolecule->getNumBonds()); Seeds.clear(); QueryMoleculeMatchedBonds = 0; QueryMoleculeMatchedAtoms = 0; QueryMoleculeSingleMatchedAtom = nullptr; if (!Parameters.InitialSeed.empty()) { // make user defined seed std::unique_ptr initialSeedMolecule( static_cast(SmartsToMol(Parameters.InitialSeed))); // make a set of of seed as indices and pointers to current query // molecule items based on matching results std::vector matching_substructs; SubstructMatch(*QueryMolecule, *initialSeedMolecule, matching_substructs); // loop throw all fragments of Query matched to initial seed for (const auto &ms : matching_substructs) { Seed seed; seed.setStoreAllDegenerateMCS(Parameters.StoreAll); seed.ExcludedBonds = excludedBonds; seed.MatchResult.resize(Targets.size()); #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.Seed; ++VerboseStatistics.InitialSeed; } #endif // add all matched atoms of the matched query fragment std::map initialSeedToQueryAtom; for (const auto &msb : ms) { unsigned int qai = msb.second; unsigned int sai = msb.first; seed.addAtom(QueryMolecule->getAtomWithIdx(qai)); initialSeedToQueryAtom[sai] = qai; } // add all bonds (existed in initial seed !!!) between all matched // atoms in query for (const auto &msb : ms) { const auto atom = initialSeedMolecule->getAtomWithIdx(msb.first); for (const auto &nbri : boost::make_iterator_range( initialSeedMolecule->getAtomBonds(atom))) { const auto initialBond = (*initialSeedMolecule)[nbri]; unsigned int qai1 = initialSeedToQueryAtom.at(initialBond->getBeginAtomIdx()); unsigned int qai2 = initialSeedToQueryAtom.at(initialBond->getEndAtomIdx()); const auto b = QueryMolecule->getBondBetweenAtoms(qai1, qai2); CHECK_INVARIANT(b, "bond must not be NULL"); if (!seed.ExcludedBonds.test(b->getIdx())) { seed.addBond(b); seed.ExcludedBonds.set(b->getIdx()); } } } seed.computeRemainingSize(*QueryMolecule); if (checkIfMatchAndAppend(seed)) { QueryMoleculeMatchedBonds = seed.getNumBonds(); } } if (Seeds.empty()) { BOOST_LOG(rdWarningLog) << "The provided InitialSeed is not an MCS and will be ignored" << std::endl; } } if (Seeds.empty()) { // create a set of seeds from each query bond // R1 additional performance OPTIMISATION // if(Parameters.BondCompareParameters.CompleteRingsOnly) // disable all mismatched rings, and do not generate initial seeds // from such disabled bonds // for( rings .....) for(i......) // if(mismatched) excludedBonds[i.......] = true; std::vector wbVec; wbVec.reserve(QueryMolecule->getNumBonds()); for (const auto &bond : QueryMolecule->bonds()) { wbVec.emplace_back(bond); } for (const auto &wb : wbVec) { // R1 additional performance OPTIMISATION // if(excludedBonds[(*bi)->getIdx()]) // continue; Seed seed; seed.setStoreAllDegenerateMCS(Parameters.StoreAll); seed.MatchResult.resize(Targets.size()); #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.Seed; ++VerboseStatistics.InitialSeed; } #endif seed.addAtom(wb.BondPtr->getBeginAtom()); seed.addAtom(wb.BondPtr->getEndAtom()); seed.ExcludedBonds = excludedBonds; // all bonds from first to current seed.addBond(wb.BondPtr); excludedBonds.set(wb.BondPtr->getIdx()); seed.computeRemainingSize(*QueryMolecule); if (checkIfMatchAndAppend(seed)) { ++QueryMoleculeMatchedBonds; } else { // optionally remove all such bonds from all targets TOPOLOGY // where it exists. //.......... // disable (mark as already processed) mismatched bond in all // seeds for (auto &Seed : Seeds) { Seed.ExcludedBonds.set(wb.BondPtr->getIdx()); } #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.MismatchedInitialSeed; #endif } } } auto nq = QueryMolecule->getNumAtoms(); MatchVectType singleAtomPairMatch(1); MatchVectType emptyBondPairMatch; for (size_t i = 0; i < nq; i++) { // all query's atoms const auto queryMolAtom = QueryMolecule->getAtomWithIdx(i); bool isQueryMolAtomInRing = queryIsAtomInRing(queryMolAtom); unsigned int matched = 0; const Atom *candQueryMoleculeSingleMatchedAtom = nullptr; for (const auto &tag : Targets) { auto nt = tag.Molecule->getNumAtoms(); for (size_t aj = 0; aj < nt; aj++) { if (tag.AtomMatchTable.at(i, aj)) { const auto targetMolAtom = tag.Molecule->getAtomWithIdx(aj); bool isTargetMolAtomInRing = queryIsAtomInRing(targetMolAtom); ++matched; if (!(Parameters.BondCompareParameters.CompleteRingsOnly && (isQueryMolAtomInRing || isTargetMolAtomInRing))) { bool shouldAccept = !Parameters.ShouldAcceptMCS; if (!shouldAccept) { singleAtomPairMatch[0] = std::make_pair(i, aj); shouldAccept = Parameters.ShouldAcceptMCS( *QueryMolecule, *tag.Molecule, singleAtomPairMatch, emptyBondPairMatch, &Parameters); } if (shouldAccept) { candQueryMoleculeSingleMatchedAtom = queryMolAtom; } } break; } } } if (matched && matched >= ThresholdCount) { ++QueryMoleculeMatchedAtoms; if (candQueryMoleculeSingleMatchedAtom) { if (!QueryMoleculeSingleMatchedAtom) { QueryMoleculeSingleMatchedAtom = candQueryMoleculeSingleMatchedAtom; } else { QueryMoleculeSingleMatchedAtom = (std::max)( candQueryMoleculeSingleMatchedAtom, QueryMoleculeSingleMatchedAtom, [](const Atom *a, const Atom *b) { if (a->getDegree() != b->getDegree()) { return (a->getDegree() < b->getDegree()); } else if (a->getFormalCharge() != b->getFormalCharge()) { return (a->getFormalCharge() < b->getFormalCharge()); } else if (a->getAtomicNum() != b->getAtomicNum()) { return (a->getAtomicNum() < b->getAtomicNum()); } return (a->getIdx() < b->getIdx()); }); } } } } } namespace { bool checkIfRingsAreClosed(const Seed &fs, bool noLoneRingAtoms) { if (fs.MoleculeFragment.Bonds.empty() && fs.MoleculeFragment.Atoms.empty()) { return true; } const auto &om = fs.MoleculeFragment.Atoms.front()->getOwningMol(); const auto ri = om.getRingInfo(); if (!ri->numRings()) { return true; } boost::dynamic_bitset<> mcsBonds(om.getNumBonds()); boost::dynamic_bitset<> mcsNonFusedRings(ri->numRings()); boost::dynamic_bitset<> mcsFusedRings(ri->numRings()); for (const auto &bond : fs.MoleculeFragment.Bonds) { auto bi = bond->getIdx(); mcsBonds.set(bi); if (ri->numBondRings(bi) == 1) { mcsNonFusedRings.set(ri->bondMembers(bi).front()); } } for (unsigned int ringIdx = 0; ringIdx < mcsNonFusedRings.size(); ++ringIdx) { if (!mcsNonFusedRings.test(ringIdx)) { continue; } for (const auto &bi : ri->bondRings().at(ringIdx)) { bool keepBond = false; for (unsigned int memberOf : ri->bondMembers(bi)) { if (memberOf == ringIdx) { keepBond = true; } else if (mcsNonFusedRings.test(memberOf)) { keepBond = false; break; } } if (keepBond && !mcsBonds.test(bi)) { return false; } } } if (noLoneRingAtoms) { for (const auto &atom : fs.MoleculeFragment.Atoms) { auto ai = atom->getIdx(); const auto &ringIndices = ri->atomMembers(ai); if (!ringIndices.empty() && !std::any_of(ringIndices.begin(), ringIndices.end(), [&mcsNonFusedRings](const auto &ringIdx) { return mcsNonFusedRings.test(ringIdx); })) { return false; } } } if (mcsNonFusedRings.none()) { for (const auto &bond : fs.MoleculeFragment.Bonds) { auto bi = bond->getIdx(); if (ri->numBondRings(bi) > 1) { for (auto ringIdx : ri->bondMembers(bi)) { mcsFusedRings.set(ringIdx); } } } } if (mcsFusedRings.any()) { for (unsigned int ringIdx = 0; ringIdx < mcsFusedRings.size(); ++ringIdx) { if (!mcsFusedRings.test(ringIdx)) { continue; } const auto &ringBondIndices = ri->bondRings().at(ringIdx); if (std::all_of( ringBondIndices.begin(), ringBondIndices.end(), [&mcsBonds](const auto &bi) { return mcsBonds.test(bi); })) { return true; } } return false; } return true; } bool checkIfShouldAcceptMCS(const FMCS::MolFragment &f, const ROMol &query, const std::vector &targets, const MCSParameters &p) { if (!p.ShouldAcceptMCS || f.Bonds.empty()) { return true; } Seed seed; // result MCS seed.ExcludedBonds.resize(query.getNumBonds(), false); for (const auto &atom : f.Atoms) { seed.addAtom(atom); } for (const auto &bond : f.Bonds) { seed.addBond(bond); } for (const auto &target : targets) { match_V_t match; bool targetMatched = SubstructMatchCustomTable( target.Topology, *target.Molecule, seed.Topology, query, target.AtomMatchTable, target.BondMatchTable, &p, &match); // it is OK to skip non-matches here as threshold could be < 1.0; // we only want to triage matches if (!targetMatched) { continue; } MatchVectType atomPairMatch; atomPairMatch.reserve(match.size()); std::vector queryToTargetAtomIdx(query.getNumAtoms(), NotSet); for (const auto &m : match) { auto queryAtomIdx = seed.Topology[m.first]; auto targetAtomIdx = target.Topology[m.second]; atomPairMatch.emplace_back(queryAtomIdx, targetAtomIdx); queryToTargetAtomIdx[queryAtomIdx] = targetAtomIdx; } MatchVectType bondPairMatch; auto numMcsBonds = boost::num_edges(seed.Topology); bondPairMatch.reserve(numMcsBonds); for (const auto &it : boost::make_iterator_range(boost::edges(seed.Topology))) { int queryBeginAtomIdx = seed.Topology[boost::source(it, seed.Topology)]; int queryEndAtomIdx = seed.Topology[boost::target(it, seed.Topology)]; const auto queryBond = query.getBondBetweenAtoms(queryBeginAtomIdx, queryEndAtomIdx); CHECK_INVARIANT(queryBond, ""); const auto targetBeginAtomIdx = queryToTargetAtomIdx.at(queryBeginAtomIdx); CHECK_INVARIANT(targetBeginAtomIdx != NotSet, ""); const auto targetEndAtomIdx = queryToTargetAtomIdx.at(queryEndAtomIdx); CHECK_INVARIANT(targetEndAtomIdx != NotSet, ""); const auto targetBond = target.Molecule->getBondBetweenAtoms( targetBeginAtomIdx, targetEndAtomIdx); CHECK_INVARIANT(targetBond, ""); bondPairMatch.emplace_back(queryBond->getIdx(), targetBond->getIdx()); } if (!p.ShouldAcceptMCS(query, *target.Molecule, atomPairMatch, bondPairMatch, &p)) { return false; } } return true; } } // namespace bool MaximumCommonSubgraph::growSeeds() { bool mcsFound = false; bool canceled = false; // Find MCS -- SDF Seed growing OPTIMISATION (it works in 3 times // faster) while (!Seeds.empty()) { if (getMaxNumberBonds() == QueryMoleculeMatchedBonds) { // MCS == Query break; } #ifdef VERBOSE_STATISTICS_ON VerboseStatistics.TotalSteps++; #endif auto si = Seeds.begin(); si->grow(*this); { const Seed &fs = Seeds.front(); // bigger substructure found if (fs.CopyComplete) { bool possibleMCS = false; if (!Parameters.MaximizeBonds) { possibleMCS = (fs.getNumAtoms() > getMaxNumberAtoms() || (fs.getNumAtoms() == getMaxNumberAtoms() && fs.getNumBonds() > getMaxNumberBonds())); } else { possibleMCS = (fs.getNumBonds() > getMaxNumberBonds() || (fs.getNumBonds() == getMaxNumberBonds() && fs.getNumAtoms() > getMaxNumberAtoms())); } bool isDegenerateMCS = (fs.getNumBonds() == getMaxNumberBonds() && fs.getNumAtoms() == getMaxNumberAtoms()); if (!possibleMCS && Parameters.StoreAll) { possibleMCS = isDegenerateMCS; } // #945: test here to see if the MCS actually has all rings closed if (possibleMCS && Parameters.BondCompareParameters.CompleteRingsOnly) { possibleMCS = checkIfRingsAreClosed( fs, Parameters.AtomCompareParameters.CompleteRingsOnly); } if (possibleMCS) { possibleMCS = checkIfShouldAcceptMCS( fs.MoleculeFragment, *QueryMolecule, Targets, Parameters); } if (possibleMCS) { mcsFound = true; #ifdef VERBOSE_STATISTICS_ON VerboseStatistics.MCSFoundStep = VerboseStatistics.TotalSteps; VerboseStatistics.MCSFoundTime = nanoClock(); #endif McsIdx.Atoms = fs.MoleculeFragment.Atoms; McsIdx.Bonds = fs.MoleculeFragment.Bonds; if (Parameters.Verbose) { std::cout << VerboseStatistics.TotalSteps << " Seeds:" << Seeds.size() << " MCS " << McsIdx.Atoms.size() << " atoms, " << McsIdx.Bonds.size() << " bonds"; printf(" for %.4lf seconds. bond[0]=%u\n", double(VerboseStatistics.MCSFoundTime - To) / 1000000., McsIdx.Bonds.front()->getIdx()); } if (Parameters.StoreAll) { if (!isDegenerateMCS) { DegenerateMcsMap.clear(); } std::vector key(McsIdx.Bonds.size()); std::transform(McsIdx.Bonds.begin(), McsIdx.Bonds.end(), key.begin(), [](const auto bond) { return bond->getIdx(); }); std::sort(key.begin(), key.end()); MCS value(McsIdx); value.QueryMolecule = QueryMolecule; value.Targets = Targets; DegenerateMcsMap[key] = value; } } } } if (NotSet == si->GrowingStage) { // finished Seeds.erase(si); } if (Parameters.ProgressCallback) { Stat.NumAtoms = getMaxNumberAtoms(); Stat.NumBonds = getMaxNumberBonds(); if (!Parameters.ProgressCallback(Stat, Parameters, Parameters.ProgressCallbackUserData)) { canceled = true; break; } } } if (mcsFound) { // postponed copy of current set of molecules for // threshold < 1. McsIdx.QueryMolecule = QueryMolecule; McsIdx.Targets = Targets; } return !canceled; } // namespace FMCS struct AtomMatch { // for each seed atom (matched) unsigned int QueryAtomIdx; unsigned int TargetAtomIdx; AtomMatch() : QueryAtomIdx(NotSet), TargetAtomIdx(NotSet) {} }; typedef std::vector AtomMatchSet; std::pair MaximumCommonSubgraph::generateResultSMARTSAndQueryMol( const MCS &mcsIdx) const { // match the result MCS with all targets to check if it is exact match // or template Seed seed; // result MCS seed.setStoreAllDegenerateMCS(Parameters.StoreAll); seed.ExcludedBonds.resize(mcsIdx.QueryMolecule->getNumBonds(), false); std::vector atomMatchResult(mcsIdx.Targets.size()); std::vector atomIdxMap(mcsIdx.QueryMolecule->getNumAtoms()); std::vector> bondMatchSet( mcsIdx.Bonds.size()); // key is unique BondType std::vector> atomMatchSet( mcsIdx.Atoms.size()); // key is unique atomic number for (const auto &atom : mcsIdx.Atoms) { atomIdxMap[atom->getIdx()] = seed.getNumAtoms(); seed.addAtom(atom); } for (const auto &bond : mcsIdx.Bonds) { seed.addBond(bond); } std::vector matchedTargetIndices; if (!mcsIdx.Bonds.empty()) { for (const auto &tag : mcsIdx.Targets) { match_V_t match; // THERE IS NO Bonds match INFO !!!! bool target_matched = SubstructMatchCustomTable( tag.Topology, *tag.Molecule, seed.Topology, *QueryMolecule, tag.AtomMatchTable, tag.BondMatchTable, &Parameters, &match); if (!target_matched) { continue; } unsigned int itarget = &tag - &mcsIdx.Targets.front(); matchedTargetIndices.push_back(itarget); atomMatchResult.at(itarget).resize(seed.getNumAtoms()); for (const auto &m : match) { const auto ai = m.first; // SeedAtomIdx atomMatchResult.at(itarget).at(ai).QueryAtomIdx = seed.Topology[m.first]; atomMatchResult.at(itarget).at(ai).TargetAtomIdx = tag.Topology[m.second]; const auto ta = tag.Molecule->getAtomWithIdx(tag.Topology[m.second]); if (ta->getAtomicNum() != seed.MoleculeFragment.Atoms.at(ai)->getAtomicNum()) { atomMatchSet[ai][ta->getAtomicNum()] = ta; // add } } // AND BUILD BOND MATCH INFO for (const auto &bond : mcsIdx.Bonds) { const auto bi = &bond - &mcsIdx.Bonds.front(); const auto i = atomIdxMap.at(bond->getBeginAtomIdx()); const auto j = atomIdxMap.at(bond->getEndAtomIdx()); const auto ti = atomMatchResult.at(itarget).at(i).TargetAtomIdx; const auto tj = atomMatchResult.at(itarget).at(j).TargetAtomIdx; const auto tb = tag.Molecule->getBondBetweenAtoms(ti, tj); if (tb && bond->getBondType() != tb->getBondType()) { bondMatchSet[bi][tb->getBondType()] = tb; // add } } } } // Generate result's SMARTS // create molecule from MCS for MolToSmarts() auto mol = new RWMol(); ROMOL_SPTR molSptr(mol); const auto ri = mcsIdx.QueryMolecule->getRingInfo(); boost::dynamic_bitset<> mcsRingIsComplete; bool needAtomRingQueries = (Parameters.AtomCompareParameters.RingMatchesRingOnly || Parameters.BondCompareParameters.MatchFusedRingsStrict); if (needAtomRingQueries) { mcsRingIsComplete.resize(ri->numRings(), true); boost::dynamic_bitset<> queryBondInMcs(mcsIdx.QueryMolecule->getNumBonds()); for (const auto &bond : mcsIdx.Bonds) { queryBondInMcs.set(bond->getIdx()); } const auto &bondRings = ri->bondRings(); for (const auto &bondRing : bondRings) { auto ringIdx = &bondRing - &bondRings.front(); for (const auto &bondIdx : bondRing) { if (!queryBondInMcs.test(bondIdx)) { mcsRingIsComplete.reset(ringIdx); break; } } } } for (const auto &atom : mcsIdx.Atoms) { auto queryAtomIdx = atom->getIdx(); auto numAtomRings = ri->numAtomRings(queryAtomIdx); QueryAtom a; const auto ai = &atom - &mcsIdx.Atoms.front(); if (Parameters.AtomTyper == MCSAtomCompareIsotopes || Parameters.AtomCompareParameters .MatchIsotope) { // do '[0*]-[0*]-[13*]' for CC[13NH2] a.setQuery(makeAtomIsotopeQuery(static_cast(atom->getIsotope()))); } else { // generate [#6] instead of C or c ! a.setQuery(makeAtomNumQuery(atom->getAtomicNum())); // for all atomMatchSet[ai] items add atom query to template like // [#6,#17,#9, ... ] for (const auto &am : atomMatchSet.at(ai)) { a.expandQuery(makeAtomNumQuery(am.second->getAtomicNum()), Queries::COMPOSITE_OR); if (Parameters.AtomCompareParameters.MatchChiralTag && (am.second->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW || am.second->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW)) { a.setChiralTag(am.second->getChiralTag()); } } } if (needAtomRingQueries) { const auto &ringIndicesAtomIsMemberOf = ri->atomMembers(queryAtomIdx); auto numCompleteRings = std::count_if( ringIndicesAtomIsMemberOf.begin(), ringIndicesAtomIsMemberOf.end(), [&mcsRingIsComplete](const auto &ringIdx) { return mcsRingIsComplete.test(ringIdx); }); if (Parameters.AtomCompareParameters.RingMatchesRingOnly && !numCompleteRings) { auto q = makeAtomInRingQuery(); q->setNegation(!numAtomRings); a.expandQuery(q, Queries::COMPOSITE_AND, true); } else if (Parameters.BondCompareParameters.MatchFusedRingsStrict && numAtomRings == 1 && numCompleteRings == 1) { auto ringSize = ri->atomRings().at(ringIndicesAtomIsMemberOf.front()).size(); auto q = new ATOM_OR_QUERY; q->setDescription("AtomOr"); q->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE( makeAtomMinRingSizeQuery(ringSize))); auto q2 = makeAtomInNRingsQuery(1); q2->setNegation(true); q->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(q2)); a.expandQuery(q, Queries::COMPOSITE_AND, true); } } mol->addAtom(&a, true, false); } for (const auto &bond : mcsIdx.Bonds) { QueryBond b; const auto bi = &bond - &mcsIdx.Bonds.front(); const auto beginAtomIdx = atomIdxMap.at(bond->getBeginAtomIdx()); const auto endAtomIdx = atomIdxMap.at(bond->getEndAtomIdx()); b.setBeginAtomIdx(beginAtomIdx); b.setEndAtomIdx(endAtomIdx); b.setQuery(makeBondOrderEqualsQuery(bond->getBondType())); // add OR template if need for (const auto &bm : bondMatchSet.at(bi)) { b.expandQuery(makeBondOrderEqualsQuery(bm.second->getBondType()), Queries::COMPOSITE_OR); if (Parameters.BondCompareParameters.MatchStereo && bm.second->getStereo() > Bond::STEREOANY) { b.setStereo(bm.second->getStereo()); } } if (Parameters.BondCompareParameters.RingMatchesRingOnly || Parameters.BondCompareParameters.MatchFusedRingsStrict) { const auto numBondRings = ri->numBondRings(bond->getIdx()); auto q = makeBondIsInRingQuery(); q->setNegation(!numBondRings); b.expandQuery(q, Queries::COMPOSITE_AND, true); } mol->addBond(&b, false); } return std::make_pair(MolToSmarts(*mol, true), molSptr); } bool MaximumCommonSubgraph::createSeedFromMCS(size_t newQueryTarget, Seed &newSeed) { Seed mcs; mcs.setStoreAllDegenerateMCS(Parameters.StoreAll); mcs.ExcludedBonds.resize(McsIdx.QueryMolecule->getNumBonds(), false); std::vector mcsAtomIdxMap(McsIdx.QueryMolecule->getNumAtoms()); for (const auto &atom : McsIdx.Atoms) { mcsAtomIdxMap[atom->getIdx()] = mcs.addAtom(atom); } for (const auto &bond : McsIdx.Bonds) { mcs.addBond(bond); } const Target &newQuery = McsIdx.Targets.at(newQueryTarget); match_V_t match; bool target_matched = SubstructMatchCustomTable( newQuery.Topology, *newQuery.Molecule, mcs.Topology, *McsIdx.QueryMolecule, newQuery.AtomMatchTable, newQuery.BondMatchTable, &Parameters, &match); if (!target_matched) { return false; } AtomMatchSet atomMatchResult(mcs.getNumAtoms()); newSeed.ExcludedBonds.resize(newQuery.Molecule->getNumBonds(), false); for (const auto &m : match) { unsigned int ai = m.first; // SeedAtomIdx in mcs seed atomMatchResult[ai].QueryAtomIdx = mcs.Topology[m.first]; atomMatchResult[ai].TargetAtomIdx = newQuery.Topology[m.second]; const auto ta = newQuery.Molecule->getAtomWithIdx(newQuery.Topology[m.second]); newSeed.addAtom(ta); } for (const auto &bond : McsIdx.Bonds) { unsigned int i = mcsAtomIdxMap.at(bond->getBeginAtomIdx()); unsigned int j = mcsAtomIdxMap.at(bond->getEndAtomIdx()); unsigned int ti = atomMatchResult.at(i).TargetAtomIdx; unsigned int tj = atomMatchResult.at(j).TargetAtomIdx; const auto tb = newQuery.Molecule->getBondBetweenAtoms(ti, tj); CHECK_INVARIANT(tb, "tb most not be NULL"); newSeed.addBond(tb); } newSeed.computeRemainingSize(*newQuery.Molecule); return true; } MCSResult MaximumCommonSubgraph::find(const std::vector &src_mols) { clear(); MCSResult res; if (src_mols.size() < 2) { throw std::runtime_error( "FMCS. Invalid argument. mols.size() must be at least 2"); } if (Parameters.Threshold > 1.0) { throw std::runtime_error( "FMCS. Invalid argument. Parameter Threshold must be 1.0 or " "less."); } // minimal required number of matched targets: // at least one target, max all targets ThresholdCount = static_cast(std::min( static_cast(src_mols.size()) - 1, std::max(1, static_cast(ceil(static_cast(src_mols.size()) * Parameters.Threshold)) - 1))); // AtomCompareParameters.CompleteRingsOnly implies // BondCompareParameters.CompleteRingsOnly if (Parameters.AtomCompareParameters.CompleteRingsOnly) { Parameters.BondCompareParameters.CompleteRingsOnly = true; } // Selecting CompleteRingsOnly option also enables // --ring-matches-ring-only. ring--ring and chain bonds only match chain // bonds. if (Parameters.BondCompareParameters.CompleteRingsOnly) { Parameters.BondCompareParameters.RingMatchesRingOnly = true; } if (Parameters.AtomCompareParameters.CompleteRingsOnly) { Parameters.AtomCompareParameters.RingMatchesRingOnly = true; } unsigned int i = 0; boost::dynamic_bitset<> faked_ring_info(src_mols.size()); for (const auto &src_mol : src_mols) { Molecules.push_back(src_mol.get()); if (!Molecules.back()->getRingInfo()->isInitialized()) { Molecules.back()->getRingInfo()->initialize(); // but do not fill out !!! faked_ring_info.set(i); } ++i; } // sort source set of molecules by their 'size' and assume the smallest // molecule as a query std::stable_sort(Molecules.begin(), Molecules.end(), molPtr_NumBondLess); size_t startIdx = 0; size_t endIdx = Molecules.size() - ThresholdCount; while (startIdx < endIdx && !Molecules.at(startIdx)->getNumAtoms()) { ++startIdx; } bool areSeedsEmpty = false; for (size_t i = startIdx; i < endIdx && !areSeedsEmpty && !res.Canceled; ++i) { init(startIdx); if (Targets.empty()) { break; } MCSFinalMatchCheckFunction tff = Parameters.FinalMatchChecker; // skip final match check for initial seed to allow future growing Parameters.FinalMatchChecker = nullptr; makeInitialSeeds(); Parameters.FinalMatchChecker = tff; // restore final functor if (Parameters.Verbose) { std::cout << "Query " << MolToSmiles(*QueryMolecule) << " " << QueryMolecule->getNumAtoms() << "(" << QueryMoleculeMatchedAtoms << ") atoms, " << QueryMolecule->getNumBonds() << "(" << QueryMoleculeMatchedBonds << ") bonds\n"; } areSeedsEmpty = Seeds.empty(); res.Canceled = !(areSeedsEmpty || growSeeds()); // verify what MCS is equal to one of initial seed for chirality match if (getMaxNumberBonds() == 0) { McsIdx = MCS(); // clear makeInitialSeeds(); // check all possible initial seeds if (!areSeedsEmpty) { const Seed &fs = Seeds.front(); if ((1 == getMaxNumberBonds() || !(Parameters.BondCompareParameters.CompleteRingsOnly && fs.MoleculeFragment.Bonds.size() == 1 && queryIsBondInRing(fs.MoleculeFragment.Bonds.front()))) && checkIfShouldAcceptMCS(fs.MoleculeFragment, *QueryMolecule, Targets, Parameters)) { McsIdx.QueryMolecule = QueryMolecule; McsIdx.Targets = Targets; McsIdx.Atoms = fs.MoleculeFragment.Atoms; McsIdx.Bonds = fs.MoleculeFragment.Bonds; } } if (!McsIdx.QueryMolecule && QueryMoleculeSingleMatchedAtom) { McsIdx.QueryMolecule = QueryMolecule; McsIdx.Targets = Targets; McsIdx.Atoms = std::vector{QueryMoleculeSingleMatchedAtom}; McsIdx.Bonds = std::vector(); } } else if (i + 1 < endIdx) { Seed seed; if (createSeedFromMCS(i, seed)) { // MCS is matched with new query Seeds.push_back(seed); } std::swap( Molecules.at(startIdx), Molecules.at(i + 1)); // change query molecule for threshold < 1. } } res.NumAtoms = getMaxNumberAtoms(); if (!res.NumAtoms && QueryMoleculeSingleMatchedAtom) { res.NumAtoms = 1; } res.NumBonds = getMaxNumberBonds(); if (res.NumBonds > 0 || QueryMoleculeSingleMatchedAtom) { if (!Parameters.StoreAll) { auto smartsQueryMolPair = generateResultSMARTSAndQueryMol(McsIdx); res.SmartsString = std::move(smartsQueryMolPair.first); res.QueryMol = std::move(smartsQueryMolPair.second); } else { std::transform(DegenerateMcsMap.begin(), DegenerateMcsMap.end(), std::inserter(res.DegenerateSmartsQueryMolDict, res.DegenerateSmartsQueryMolDict.end()), [this](const auto &pair) { return generateResultSMARTSAndQueryMol(pair.second); }); } } #ifdef VERBOSE_STATISTICS_ON if (Parameters.Verbose && res.NumAtoms > 0) { for (const auto &tag : Targets) { unsigned int itarget = &tag - &Targets.front(); MatchVectType match; bool target_matched = SubstructMatch(*tag.Molecule, *res.QueryMol, match); if (!target_matched) { std::cout << "Target " << itarget + 1 << (target_matched ? " matched " : " MISMATCHED ") << MolToSmiles(*tag.Molecule) << "\n"; } } std::cout << "STATISTICS:\n"; std::cout << "Total Growing Steps = " << VerboseStatistics.TotalSteps << ", MCS found on " << VerboseStatistics.MCSFoundStep << " step"; if (VerboseStatistics.MCSFoundTime - To > 0) { printf(", for %.4lf seconds\n", double(VerboseStatistics.MCSFoundTime - To) / 1000000.); } else { std::cout << ", for less than 1 second\n"; } std::cout << "Initial Seeds = " << VerboseStatistics.InitialSeed << ", Mismatched " << VerboseStatistics.MismatchedInitialSeed << "\n"; std::cout << "Inspected Seeds = " << VerboseStatistics.Seed << "\n"; std::cout << "Rejected by BestSize = " << VerboseStatistics.RemainingSizeRejected << "\n"; std::cout << "IndividualBondExcluded = " << VerboseStatistics.IndividualBondExcluded << "\n"; #ifdef EXCLUDE_WRONG_COMPOSITION std::cout << "Rejected by WrongComposition = " << VerboseStatistics.WrongCompositionRejected << " [ " << VerboseStatistics.WrongCompositionDetected << " Detected ]\n"; #endif std::cout << "MatchCheck Seeds = " << VerboseStatistics.SeedCheck << "\n"; std::cout //<< "\n" << " MatchCalls = " << VerboseStatistics.MatchCall << "\n" << " MatchFound = " << VerboseStatistics.MatchCallTrue << "\n"; std::cout << " fastMatchCalls = " << VerboseStatistics.FastMatchCall << "\n" << " fastMatchFound = " << VerboseStatistics.FastMatchCallTrue << "\n"; std::cout << " slowMatchCalls = " << VerboseStatistics.MatchCall - VerboseStatistics.FastMatchCallTrue << "\n" << " slowMatchFound = " << VerboseStatistics.SlowMatchCallTrue << "\n"; #ifdef VERBOSE_STATISTICS_FASTCALLS_ON std::cout << "AtomFunctorCalls = " << VerboseStatistics.AtomFunctorCalls << "\n"; std::cout << "BondCompareCalls = " << VerboseStatistics.BondCompareCalls << "\n"; #endif std::cout << " DupCacheFound = " << VerboseStatistics.DupCacheFound << " " << VerboseStatistics.DupCacheFoundMatch << " matched, " << VerboseStatistics.DupCacheFound - VerboseStatistics.DupCacheFoundMatch << " mismatched\n"; #ifdef FAST_SUBSTRUCT_CACHE std::cout << "HashCache size = " << HashCache.keyssize() << " keys\n"; std::cout << "HashCache size = " << HashCache.fullsize() << " entries\n"; std::cout << "FindHashInCache = " << VerboseStatistics.FindHashInCache << "\n"; std::cout << "HashFoundInCache= " << VerboseStatistics.HashKeyFoundInCache << "\n"; std::cout << "ExactMatchCalls = " << VerboseStatistics.ExactMatchCall << "\n" << "ExactMatchFound = " << VerboseStatistics.ExactMatchCallTrue << "\n"; #endif } #endif auto pos = faked_ring_info.find_first(); while (pos != boost::dynamic_bitset<>::npos) { src_mols[pos]->getRingInfo()->reset(); pos = faked_ring_info.find_next(pos); } clear(); return res; } bool MaximumCommonSubgraph::checkIfMatchAndAppend(Seed &seed) { #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.SeedCheck; #endif #ifdef FAST_SUBSTRUCT_CACHE SubstructureCache::HashKey cacheKey; SubstructureCache::TIndexEntry *cacheEntry = nullptr; #endif bool foundInCache = false; bool foundInDupCache = false; { #ifdef DUP_SUBSTRUCT_CACHE if (DuplicateCache.find(seed.DupCacheKey, foundInCache)) { // duplicate found. skip match() but store both seeds, because they will grow by // different paths !!! #ifdef VERBOSE_STATISTICS_ON VerboseStatistics.DupCacheFound++; VerboseStatistics.DupCacheFoundMatch += foundInCache ? 1 : 0; #endif if (!foundInCache) { // mismatched !!! return false; } } foundInDupCache = foundInCache; #endif #ifdef FAST_SUBSTRUCT_CACHE if (!foundInCache) { #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.FindHashInCache; #endif cacheEntry = HashCache.find(seed, QueryAtomLabels, QueryBondLabels, cacheKey); if (cacheEntry) { // possibly found. check for hash collision #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.HashKeyFoundInCache; #endif // check hash collisions (time +3%): for (const auto &g : *cacheEntry) { if (g.m_vertices.size() != seed.getNumAtoms() || g.m_edges.size() != seed.getNumBonds()) { continue; } #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.ExactMatchCall; #endif // EXACT MATCH foundInCache = SubstructMatchCustomTable( g, *QueryMolecule, seed.Topology, *QueryMolecule, QueryAtomMatchTable, QueryBondMatchTable, &Parameters); #ifdef VERBOSE_STATISTICS_ON if (foundInCache) { ++VerboseStatistics.ExactMatchCallTrue; } else { break; } #endif } } } #endif } bool found = foundInCache; if (!found) { found = match(seed); } Seed *newSeed = nullptr; { if (found) { // Store new generated seed, if found in cache or in // all(- threshold) targets newSeed = &Seeds.add(seed); newSeed->CopyComplete = false; #ifdef DUP_SUBSTRUCT_CACHE if (!foundInDupCache && seed.getNumBonds() >= 3) { // only seed with a ring // can be duplicated - // do not store very // small seed in cache DuplicateCache.add(seed.DupCacheKey, true); } #endif #ifdef FAST_SUBSTRUCT_CACHE if (!foundInCache) { HashCache.add(seed, cacheKey, cacheEntry); } #endif } else { #ifdef DUP_SUBSTRUCT_CACHE if (seed.getNumBonds() > 3) { DuplicateCache.add(seed.DupCacheKey, false); // opt. cache mismatched duplicates too } #endif } } if (newSeed) { *newSeed = seed; // non-blocking copy for MULTI_THREAD and best CPU // utilization } return found; // new matched seed has been actually added } bool MaximumCommonSubgraph::match(Seed &seed) { unsigned int max_miss = Targets.size() - ThresholdCount; unsigned int missing = 0; unsigned int passed = 0; for (const auto &tag : Targets) { unsigned int itarget = &tag - &Targets.front(); #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.MatchCall; } #endif bool target_matched = false; if (!seed.MatchResult.empty() && !seed.MatchResult.at(itarget).empty()) { target_matched = matchIncrementalFast(seed, itarget); } if (!target_matched) { // slow full match match_V_t match; // THERE IS NO Bonds match INFO !!!! target_matched = SubstructMatchCustomTable( tag.Topology, *tag.Molecule, seed.Topology, *QueryMolecule, tag.AtomMatchTable, tag.BondMatchTable, &Parameters, &match); // save current match info if (target_matched) { if (seed.MatchResult.empty()) { seed.MatchResult.resize(Targets.size()); } seed.MatchResult[itarget].init(seed, match, *QueryMolecule, tag); } else if (!seed.MatchResult.empty()) { seed.MatchResult[itarget].clear(); //.Empty = true; // == fast clear(); } #ifdef VERBOSE_STATISTICS_ON if (target_matched) { ++VerboseStatistics.SlowMatchCallTrue; } #endif } if (target_matched) { if (++passed >= ThresholdCount) { // it's enough break; } } else { // mismatched if (++missing > max_miss) { break; } } } if (missing <= max_miss) { #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.MatchCallTrue; #endif return true; } return false; } // call it for each target, if failed perform full match check bool MaximumCommonSubgraph::matchIncrementalFast(Seed &seed, unsigned int itarget) { // use and update results of previous match stored in the seed #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.FastMatchCall; } #endif const auto &target = Targets.at(itarget); auto &match = seed.MatchResult.at(itarget); if (match.empty()) { return false; } /* // CHIRALITY: FinalMatchCheck: if(Parameters.AtomCompareParameters.MatchChiralTag || Parameters.FinalMatchChecker) { // TEMP match.clear(); return false; } */ bool matched = false; for (unsigned int newBondSeedIdx = match.MatchedBondSize; newBondSeedIdx < seed.getNumBonds(); newBondSeedIdx++) { matched = false; bool atomAdded = false; const auto newBond = seed.MoleculeFragment.Bonds.at(newBondSeedIdx); unsigned int newBondQueryIdx = newBond->getIdx(); // seed's index of atom from which new bond was added unsigned int newBondSourceAtomSeedIdx; // seed's index of atom on other end of the bond unsigned int newBondOtherAtomSeedIdx; unsigned int i = seed.MoleculeFragment.SeedAtomIdxMap.at(newBond->getBeginAtomIdx()); unsigned int j = seed.MoleculeFragment.SeedAtomIdxMap.at(newBond->getEndAtomIdx()); if (i >= match.MatchedAtomSize) { // this is new atom in the seed newBondSourceAtomSeedIdx = j; newBondOtherAtomSeedIdx = i; } else { newBondSourceAtomSeedIdx = i; newBondOtherAtomSeedIdx = j; } unsigned int newBondOtherAtomQueryIdx = seed.MoleculeFragment.Atoms.at(newBondOtherAtomSeedIdx)->getIdx(); unsigned int newBondSourceAtomQueryIdx = seed.MoleculeFragment.Atoms.at(newBondSourceAtomSeedIdx)->getIdx(); // matched to newBondSourceAtomSeedIdx unsigned int newBondSourceAtomTargetIdx = match.TargetAtomIdx.at(newBondSourceAtomQueryIdx); const Bond *tb = nullptr; unsigned int newBondOtherAtomTargetIdx = NotSet; if (newBondOtherAtomSeedIdx < match.MatchedAtomSize) { // new bond between old atoms - both are // matched to exact atoms in the target newBondOtherAtomTargetIdx = match.TargetAtomIdx.at(newBondOtherAtomQueryIdx); // target bond between Source and Other atom tb = target.Molecule->getBondBetweenAtoms(newBondSourceAtomTargetIdx, newBondOtherAtomTargetIdx); if (tb) { // bond exists, check match with query molecule unsigned int tbi = tb->getIdx(); unsigned int qbi = seed.MoleculeFragment.Bonds.at(newBondSeedIdx)->getIdx(); if (!match.VisitedTargetBonds.test(tbi)) { // false if target bond is already matched matched = target.BondMatchTable.at(qbi, tbi); } } } else { // enumerate all bonds from source atom in the target const auto atom = target.Molecule->getAtomWithIdx(newBondSourceAtomTargetIdx); for (const auto &nbri : boost::make_iterator_range(target.Molecule->getAtomBonds(atom))) { tb = (*target.Molecule)[nbri]; if (match.VisitedTargetBonds.test(tb->getIdx())) { continue; } newBondOtherAtomTargetIdx = tb->getBeginAtomIdx(); if (newBondSourceAtomTargetIdx == newBondOtherAtomTargetIdx) { newBondOtherAtomTargetIdx = tb->getEndAtomIdx(); } if (match.VisitedTargetAtoms.test(newBondOtherAtomTargetIdx)) { continue; } // check OtherAtom and bond matched = target.AtomMatchTable.at(newBondOtherAtomQueryIdx, newBondOtherAtomTargetIdx) && target.BondMatchTable.at( seed.MoleculeFragment.Bonds.at(newBondSeedIdx)->getIdx(), tb->getIdx()); if (matched) { atomAdded = true; break; } } } if (matched) { // update match history if (atomAdded) { // new atom has been added match.MatchedAtomSize++; match.TargetAtomIdx[newBondOtherAtomQueryIdx] = newBondOtherAtomTargetIdx; match.VisitedTargetAtoms.set(newBondOtherAtomTargetIdx); } match.MatchedBondSize++; match.TargetBondIdx[newBondQueryIdx] = tb->getIdx(); match.VisitedTargetBonds.set(tb->getIdx()); } else { match.clear(); return false; } } if (match.MatchedAtomSize != seed.getNumAtoms() || match.MatchedBondSize != seed.getNumBonds()) { // number of unique items !!! match.clear(); return false; } // CHIRALITY: FinalMatchCheck if (matched && Parameters.FinalMatchChecker) { std::vector c1; c1.reserve(seed.getNumAtoms()); std::vector c2; c2.reserve(target.Molecule->getNumAtoms()); for (unsigned int si = 0; si < seed.getNumAtoms(); ++si) { // index in the seed topology c1.push_back(si); c2.push_back(match.TargetAtomIdx.at(seed.Topology[si])); } matched = Parameters.FinalMatchChecker(c1.data(), c2.data(), *QueryMolecule, seed.Topology, *target.Molecule, target.Topology, &Parameters); // check CHIRALITY if (!matched) { match.clear(); } } #ifdef VERBOSE_STATISTICS_ON if (matched) { #ifdef MULTI_THREAD Guard statlock(StatisticsMutex); #endif ++VerboseStatistics.FastMatchCallTrue; } #endif return matched; } } // namespace FMCS } // namespace RDKit