Description: computing p-values by hand as gominer is not packaged in Debian
Author: Pierre Gruet <pgt@debian.org>
Forwarded: https://github.com/CampagneLaboratory/goby3/issues/4
Last-Update: 2021-08-05

--- a/goby-distribution/pom.xml
+++ b/goby-distribution/pom.xml
@@ -382,5 +382,10 @@
             <artifactId>goby-io</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>distlib</groupId>
+            <artifactId>distlib</artifactId>
+            <version>debian</version>
+        </dependency>
     </dependencies>
 </project>
--- a/goby-distribution/src/main/java/org/campagnelab/goby/stats/FisherExactTestCalculator.java
+++ b/goby-distribution/src/main/java/org/campagnelab/goby/stats/FisherExactTestCalculator.java
@@ -18,7 +18,7 @@
 
 package org.campagnelab.goby.stats;
 
-import gominer.Fisher;
+import DistLib.hypergeometric;
 import it.unimi.dsi.fastutil.objects.ObjectArraySet;
 
 /**
@@ -97,9 +97,21 @@
          * x+y
          * @return 2-tailed gominer.Fisher p value
          */
-        // public double fisher(final int totalChanged, final int changedInNode, final int total, final int inNode) {
-        final Fisher fisher = new Fisher();
-        final double pValue = fisher.fisher(totalCountInA, sumCountInA, totalCountInA + totalCountInB, sumCountInA + sumCountInB);
+        //Variables names for the analogy with R arguments.
+        double x = sumCountInA;
+        double whiteBalls = sumCountInA + sumCountInB;
+        double blackBalls = totalCountInA + totalCountInB - whiteBalls;
+        double n = totalCountInA;
+        double sumProba = 0.;
+        //Determining the probability of the given grid under the null hypothesis.
+        double probabilityGrid = hypergeometric.density(x, whiteBalls, blackBalls, n);
+        //Computing the p-value by summing the probabilities of grids that are at least as unlikely as the current one.
+        for (double i = Math.max(0, n - blackBalls) ; i < Math.min(n, whiteBalls) + 1.E-13 ; i++) {
+            double thisProba = hypergeometric.density(i, whiteBalls, blackBalls, n);
+            if (thisProba < probabilityGrid + 1.E-13) {
+                sumProba += thisProba;
+            }
+        }
 
 
         /* Test : fisher.fisher(40,10,100,30)=
@@ -113,7 +125,7 @@
         ------------------------------------------
         */
         info.statistics.size(results.getNumberOfStatistics());
-        info.statistics.set(fisherPValuesStatIndex, pValue);
+        info.statistics.set(fisherPValuesStatIndex, sumProba);
 
         return info;
     }
--- a/goby-distribution/src/test/java/org/campagnelab/goby/R/TestFisherExact.java
+++ b/goby-distribution/src/test/java/org/campagnelab/goby/R/TestFisherExact.java
@@ -18,7 +18,7 @@
 
 package org.campagnelab.goby.R;
 
-import gominer.Fisher;
+import DistLib.hypergeometric;
 import org.apache.commons.lang.ArrayUtils;
 import org.junit.AfterClass;
 import static org.junit.Assert.assertEquals;
@@ -157,11 +157,24 @@
      */
     @Test
     public void twoTailed() {
-        final Fisher gominer = new Fisher();
-        final double gominerPValue = gominer.fisher(40, 10, 100, 30);
+        //Variables names for the analogy with R arguments.
+        double x = 10;
+        double whiteBalls = 30;
+        double blackBalls = 70;
+        double n = 40;
+        double sumProba = 0.;
+        //Determining the probability of the given grid under the null hypothesis.
+        double probabilityGrid = hypergeometric.density(x, whiteBalls, blackBalls, n);
+        //Computing the p-value by summing the probabilities of grids that are at least as unlikely as the current one.
+        for (double i = Math.max(0, n - blackBalls) ; i < Math.min(n, whiteBalls) + 1.E-13 ; i++) {
+            double thisProba = hypergeometric.density(i, whiteBalls, blackBalls, n);
+            if (thisProba < probabilityGrid + 1.E-13) {
+                sumProba += thisProba;
+            }
+        }
 
         final double fisherExactPValue = FisherExact.twoTailed(40, 10, 100, 30);
-        assertEquals("R result does not match gominer", gominerPValue, fisherExactPValue, EPSILON);
+        assertEquals("R result does not match hand-computed", sumProba, fisherExactPValue, EPSILON);
     }
 
     /**
--- a/goby-distribution/src/test/java/org/campagnelab/goby/stats/TestStatistics.java
+++ b/goby-distribution/src/test/java/org/campagnelab/goby/stats/TestStatistics.java
@@ -19,7 +19,7 @@
 package org.campagnelab.goby.stats;
 
 import org.campagnelab.goby.R.FisherExact;
-import gominer.Fisher;
+import DistLib.hypergeometric;
 import it.unimi.dsi.fastutil.objects.ObjectArrayList;
 import it.unimi.dsi.lang.MutableString;
 import org.apache.commons.math.MathException;
@@ -371,15 +371,27 @@
         assertEquals("fisher test equal expected result", 0.5044757698516504, results.getStatistic(info, fisher.statisticIds.get(0)), 0.001);
 
 
-        final Fisher fisherTest = new Fisher();
         final int totalCountInA = 1700;
         final int totalCountInB = 170; // equal total in each group
         final int sumCountInA = 90;
         final int sumCountInB = 45; // half the counts in sample B
+        //Variables names for the analogy with R arguments.
+        double x = sumCountInA;
+        double whiteBalls = sumCountInA + sumCountInB;
+        double blackBalls = totalCountInA + totalCountInB - whiteBalls;
+        double n = totalCountInA;
+        double sumProba = 0.;
+        //Determining the probability of the given grid under the null hypothesis.
+        double probabilityGrid = hypergeometric.density(x, whiteBalls, blackBalls, n);
+        //Computing the p-value by summing the probabilities of grids that are at least as unlikely as the current one.
+        for (double i = Math.max(0, n - blackBalls) ; i < Math.min(n, whiteBalls) + 1.E-13 ; i++) {
+            double thisProba = hypergeometric.density(i, whiteBalls, blackBalls, n);
+            if (thisProba < probabilityGrid + 1.E-13) {
+                sumProba += thisProba;
+            }
+        }
 
-        fisherTest.fisher(totalCountInA, sumCountInA, totalCountInA + totalCountInB, sumCountInA + sumCountInB);
-
-        final double pValue = fisherTest.getTwotail();
+        final double pValue = sumProba;
         final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB));
         final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB));
         final ChiSquareTest chisquare = new ChiSquareTestImpl();
