From 3f3613c8c797a8842f4a44432e1fd9b28c62e595 Mon Sep 17 00:00:00 2001
From: Brian Kelley <brian.kelley@novartis.com>
Date: Tue, 15 Jan 2019 14:01:36 -0500
Subject: [PATCH] Fixes #2209 - testEnumeration on 32bit systems

---
 .../Enumerate/EvenSamplePairs.cpp             | 72 ++++++++++---------
 .../ChemReactions/Enumerate/EvenSamplePairs.h | 24 +++----
 2 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.cpp b/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.cpp
index cb4b0f184..2f3d4dc95 100644
--- a/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.cpp
+++ b/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.cpp
@@ -39,34 +39,40 @@ using namespace EnumerationTypes;
 // Based on an implementation from a correspondance with Bernd Rohde.
 void EvenSamplePairsStrategy::initializeStrategy(const ChemicalReaction &,
                                                  const BBS &bbs) {
-  size_t npos = bbs.size();
+  // If we fail here, someone has a ridiculous amount of memory
+  PRECONDITION(m_numPermutations != EnumerationStrategyBase::EnumerationOverflow,
+               "Cannot represent all permutations for the even sampler");
+  
+  boost::uint64_t npos = bbs.size();
   used_count.resize(npos);
   std::fill(used_count.begin(), used_count.end(), 0);
 
   var_used.resize(npos);
-  for (size_t i = 0; i < npos; ++i) {
+  for (boost::uint64_t i = 0; i < npos; ++i) {
     var_used[i].resize(m_permutationSizes[i]);
     std::fill(var_used[i].begin(), var_used[i].end(), 0);
   }
 
   boost::uint64_t nmonomers = 0;
-  for (size_t i = 0; i < bbs.size(); ++i) nmonomers += m_permutationSizes[i];
+  for (boost::uint64_t i = 0; i < bbs.size(); ++i) {
+    nmonomers += m_permutationSizes[i];
+  }
 
   pair_used.resize(nmonomers);
-  for (size_t i = 0; i < nmonomers; ++i) {
+  for (boost::uint64_t i = 0; i < nmonomers; ++i) {
     pair_used[i].resize(nmonomers);
     std::fill(pair_used[i].begin(), pair_used[i].end(), 0);
   }
 
   pair_counts.resize(npos);
-  for (size_t i = 0; i < npos; i++) {
+  for (boost::uint64_t i = 0; i < npos; i++) {
     pair_counts[i].resize(npos);
     std::fill(pair_counts[i].begin(), pair_counts[i].end(), 0);
   }
 
   /* Initialize random number generator */
   /* Find modulus */
-  for (M = 1; M < rdcast<size_t>(m_numPermutations); M = 2 * M)
+  for (M = 1; M < rdcast<boost::uint64_t>(m_numPermutations); M = 2 * M)
     ;
   /* Set factor */
   a = 5;
@@ -91,13 +97,13 @@ void EvenSamplePairsStrategy::initializeStrategy(const ChemicalReaction &,
 //  This is fairly suboptimal for large collections
 //  of building blocks and may take a while to
 //  terminate...
-bool EvenSamplePairsStrategy::try_add(size_t seed) {
+bool EvenSamplePairsStrategy::try_add(boost::uint64_t seed) {
   const RGROUPS &digits = decode(seed);
   const RGROUPS &rgroups = m_permutationSizes;
-  size_t islack = 0;
-  size_t num_rgroups = m_permutationSizes.size();
+  boost::uint64_t islack = 0;
+  boost::uint64_t num_rgroups = m_permutationSizes.size();
 
-  for (size_t i = 0; i < num_rgroups; ++i) {
+  for (boost::uint64_t i = 0; i < num_rgroups; ++i) {
     if (var_used[i][digits[i]]) islack += var_used[i][digits[i]];
     if (islack > nslack) {
       // add better heuristic here??
@@ -107,14 +113,14 @@ bool EvenSamplePairsStrategy::try_add(size_t seed) {
   }
 
   islack = 0;
-  size_t ioffset = 0;
+  boost::uint64_t ioffset = 0;
   // check that building block pairs get evenly sampled
-  for (size_t i = 0; i < num_rgroups; ++i) {
-    size_t joffset = 0;
-    for (size_t j = 0; j < num_rgroups; ++j) {
+  for (boost::uint64_t i = 0; i < num_rgroups; ++i) {
+    boost::uint64_t joffset = 0;
+    for (boost::uint64_t j = 0; j < num_rgroups; ++j) {
       if (j == i) continue;
-      size_t ii = digits[i] + ioffset;
-      size_t jj = digits[j] + joffset;
+      boost::uint64_t ii = digits[i] + ioffset;
+      boost::uint64_t jj = digits[j] + joffset;
       if (pair_used[ii][jj] > 0) {
         double numer = (double)pair_used[ii][jj];
         double denom = sqrt((double)(rgroups[i]) * (double)(rgroups[j]));
@@ -131,7 +137,7 @@ bool EvenSamplePairsStrategy::try_add(size_t seed) {
   }
 
   // keep track of bb usage
-  for (size_t i = 0; i < num_rgroups; ++i) {
+  for (boost::uint64_t i = 0; i < num_rgroups; ++i) {
     if (var_used[i][digits[i]] == 0) {
       used_count[i]++;
     }
@@ -142,7 +148,7 @@ bool EvenSamplePairsStrategy::try_add(size_t seed) {
         nslack = min_nslack;  
 
       used_count[i] = 0;
-      for (size_t j = 0; j < rgroups[i]; ++j) {
+      for (boost::uint64_t j = 0; j < rgroups[i]; ++j) {
         var_used[i][j]--;
         if (var_used[i][j] > 0) used_count[i]++;
       }
@@ -151,14 +157,14 @@ bool EvenSamplePairsStrategy::try_add(size_t seed) {
 
   // keep track of BB Pair usage
   ioffset = 0;
-  for (size_t i = 0; i < num_rgroups; ioffset += rgroups[i], ++i) {
-    size_t joffset = 0;
-    for (size_t j = 0; j < num_rgroups; joffset += rgroups[j], ++j) {
+  for (boost::uint64_t i = 0; i < num_rgroups; ioffset += rgroups[i], ++i) {
+    boost::uint64_t joffset = 0;
+    for (boost::uint64_t j = 0; j < num_rgroups; joffset += rgroups[j], ++j) {
       if (j == i) {
         continue;
       }
-      size_t ii = digits[i] + ioffset;
-      size_t jj = digits[j] + joffset;
+      boost::uint64_t ii = digits[i] + ioffset;
+      boost::uint64_t jj = digits[j] + joffset;
       if (pair_used[ii][jj] == 0) {
         pair_counts[i][j]++;
       }
@@ -168,8 +174,8 @@ bool EvenSamplePairsStrategy::try_add(size_t seed) {
           nslack = min_nslack;
         }
         pair_counts[i][j] = 0;
-        for (size_t ii = 0; ii < rgroups[i]; ++ii) {
-          for (size_t jj = 0; jj < rgroups[j]; ++jj) {
+        for (boost::uint64_t ii = 0; ii < rgroups[i]; ++ii) {
+          for (boost::uint64_t jj = 0; jj < rgroups[j]; ++jj) {
             pair_used[ioffset + ii][joffset + jj]--;
             if (pair_used[ioffset + ii][joffset + jj] > 0) {
               pair_counts[i][j]++;
@@ -186,11 +192,11 @@ bool EvenSamplePairsStrategy::try_add(size_t seed) {
 
 const RGROUPS &EvenSamplePairsStrategy::next() {
   nslack = 0;
-  while (m_numPermutationsProcessed < rdcast<size_t>(m_numPermutations)) {
+  while (m_numPermutationsProcessed < rdcast<boost::uint64_t>(m_numPermutations)) {
     bool added = false;
-    for (size_t l = 0; l < M; ++l) {
+    for (boost::uint64_t l = 0; l < M; ++l) {
       seed = ((seed * a + b) % M);
-      if (seed > rdcast<size_t>(m_numPermutations)) {
+      if (seed > rdcast<boost::uint64_t>(m_numPermutations)) {
         rejected_period += 1;
         continue;
       } else if (selected.find(seed) != selected.end()) {
@@ -216,12 +222,12 @@ const RGROUPS &EvenSamplePairsStrategy::next() {
 std::string EvenSamplePairsStrategy::stats() const {
   std::ostringstream ss;
 
-  size_t npos = m_permutationSizes.size();
+  boost::uint64_t npos = m_permutationSizes.size();
   const RGROUPS &nvars = m_permutationSizes;
-  size_t i, l, j, ii, jj, ioffset, joffset;
+  boost::uint64_t i, l, j, ii, jj, ioffset, joffset;
   ss << "#BEGIN# BBSTAT\n";
   for (i = 0; i < npos; i++) {
-    size_t maxcount = 0;
+    boost::uint64_t maxcount = 0;
     if (nvars[i] == 1) continue;
     for (j = 0; j < nvars[i]; j++)
       if (maxcount < var_used[i][j]) maxcount = var_used[i][j];
@@ -230,7 +236,7 @@ std::string EvenSamplePairsStrategy::stats() const {
               ((double)m_numPermutationsProcessed / nvars[i]);
 
     for (l = 0; l <= maxcount; l++) {
-      size_t n = 0;
+      boost::uint64_t n = 0;
       for (j = 0; j < nvars[i]; j++)
         if (var_used[i][j] == l) n++;
       if (n > 0) ss << boost::format("\t%lu|%lu") % l % n;
@@ -243,7 +249,7 @@ std::string EvenSamplePairsStrategy::stats() const {
   for (i = 0, ioffset = 0; i < npos; ioffset += nvars[i], i++) {
     if (nvars[i] == 1) continue;
     for (j = 0, joffset = 0; j < npos; joffset += nvars[j], j++) {
-      size_t maxcount = 0;
+      boost::uint64_t maxcount = 0;
       if (nvars[j] == 1) continue;
       if (j <= i) continue;
       for (ii = 0; ii < nvars[i]; ii++)
diff --git a/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h b/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h
index ce126644c..64219e08f 100644
--- a/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h
+++ b/Code/GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h
@@ -56,16 +56,16 @@ class RDKIT_CHEMREACTIONS_EXPORT EvenSamplePairsStrategy : public EnumerationStr
   boost::uint64_t m_numPermutationsProcessed;
 
   std::vector<boost::int64_t> used_count;
-  std::vector<std::vector<size_t> > var_used;
-  std::vector<std::vector<size_t> > pair_used;
-  std::vector<std::vector<size_t> > pair_counts;
-  std::set<size_t> selected;
+  std::vector<std::vector<boost::uint64_t> > var_used;
+  std::vector<std::vector<boost::uint64_t> > pair_used;
+  std::vector<std::vector<boost::uint64_t> > pair_counts;
+  std::set<boost::uint64_t> selected;
 
-  size_t seed;     // last seed for permutation (starts at 0)
-  size_t M, a, b;  // random number stuff
-  size_t nslack, min_nslack;
-  size_t rejected_period, rejected_unique;
-  size_t rejected_slack_condition, rejected_bb_sampling_condition;
+  boost::uint64_t seed;     // last seed for permutation (starts at 0)
+  boost::uint64_t M, a, b;  // random number stuff
+  boost::uint64_t nslack, min_nslack;
+  boost::uint64_t rejected_period, rejected_unique;
+  boost::uint64_t rejected_slack_condition, rejected_bb_sampling_condition;
 
  public:
   EvenSamplePairsStrategy()
@@ -120,7 +120,7 @@ class RDKIT_CHEMREACTIONS_EXPORT EvenSamplePairsStrategy : public EnumerationStr
 
     EvenSamplePairsStrategy rgroups;
     rgroups.initialize(rxn, bbs);
-    for(size_t i=0; i<num_samples && rgroups; ++i) {
+    for(boost::uint64_t i=0; i<num_samples && rgroups; ++i) {
       MOL_SPTR_VECT rvect = getReactantsFromRGroups(bbs, rgroups.next());
       std::vector<MOL_SPTR_VECT> lprops = rxn.RunReactants(rvect);
       ...
@@ -151,7 +151,7 @@ class RDKIT_CHEMREACTIONS_EXPORT EvenSamplePairsStrategy : public EnumerationStr
   friend class boost::serialization::access;
 
   // decode a packed integer into an RGroup selection
-  const EnumerationTypes::RGROUPS &decode(size_t seed) {
+  const EnumerationTypes::RGROUPS &decode(boost::uint64_t seed) {
     for (boost::int64_t j = m_permutationSizes.size() - 1; j >= 0; j--) {
       m_permutation[j] = seed % m_permutationSizes[j];
       seed /= m_permutationSizes[j];
@@ -159,7 +159,7 @@ class RDKIT_CHEMREACTIONS_EXPORT EvenSamplePairsStrategy : public EnumerationStr
     return m_permutation;
   }
 
-  bool try_add(size_t seed);
+  bool try_add(boost::uint64_t seed);
 
  public:
 #ifdef RDK_USE_BOOST_SERIALIZATION
