1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
Description: computing p-values by hand as gominer is not packaged in Debian
Author: Pierre Gruet <pgt@debian.org>
Forwarded: https://github.com/CampagneLaboratory/goby3/issues/4
Last-Update: 2021-08-05
--- a/goby-distribution/pom.xml
+++ b/goby-distribution/pom.xml
@@ -382,5 +382,10 @@
<artifactId>goby-io</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>distlib</groupId>
+ <artifactId>distlib</artifactId>
+ <version>debian</version>
+ </dependency>
</dependencies>
</project>
--- a/goby-distribution/src/main/java/org/campagnelab/goby/stats/FisherExactTestCalculator.java
+++ b/goby-distribution/src/main/java/org/campagnelab/goby/stats/FisherExactTestCalculator.java
@@ -18,7 +18,7 @@
package org.campagnelab.goby.stats;
-import gominer.Fisher;
+import DistLib.hypergeometric;
import it.unimi.dsi.fastutil.objects.ObjectArraySet;
/**
@@ -97,9 +97,21 @@
* x+y
* @return 2-tailed gominer.Fisher p value
*/
- // public double fisher(final int totalChanged, final int changedInNode, final int total, final int inNode) {
- final Fisher fisher = new Fisher();
- final double pValue = fisher.fisher(totalCountInA, sumCountInA, totalCountInA + totalCountInB, sumCountInA + sumCountInB);
+ //Variables names for the analogy with R arguments.
+ double x = sumCountInA;
+ double whiteBalls = sumCountInA + sumCountInB;
+ double blackBalls = totalCountInA + totalCountInB - whiteBalls;
+ double n = totalCountInA;
+ double sumProba = 0.;
+ //Determining the probability of the given grid under the null hypothesis.
+ double probabilityGrid = hypergeometric.density(x, whiteBalls, blackBalls, n);
+ //Computing the p-value by summing the probabilities of grids that are at least as unlikely as the current one.
+ for (double i = Math.max(0, n - blackBalls) ; i < Math.min(n, whiteBalls) + 1.E-13 ; i++) {
+ double thisProba = hypergeometric.density(i, whiteBalls, blackBalls, n);
+ if (thisProba < probabilityGrid + 1.E-13) {
+ sumProba += thisProba;
+ }
+ }
/* Test : fisher.fisher(40,10,100,30)=
@@ -113,7 +125,7 @@
------------------------------------------
*/
info.statistics.size(results.getNumberOfStatistics());
- info.statistics.set(fisherPValuesStatIndex, pValue);
+ info.statistics.set(fisherPValuesStatIndex, sumProba);
return info;
}
--- a/goby-distribution/src/test/java/org/campagnelab/goby/R/TestFisherExact.java
+++ b/goby-distribution/src/test/java/org/campagnelab/goby/R/TestFisherExact.java
@@ -18,7 +18,7 @@
package org.campagnelab.goby.R;
-import gominer.Fisher;
+import DistLib.hypergeometric;
import org.apache.commons.lang.ArrayUtils;
import org.junit.AfterClass;
import static org.junit.Assert.assertEquals;
@@ -157,11 +157,24 @@
*/
@Test
public void twoTailed() {
- final Fisher gominer = new Fisher();
- final double gominerPValue = gominer.fisher(40, 10, 100, 30);
+ //Variables names for the analogy with R arguments.
+ double x = 10;
+ double whiteBalls = 30;
+ double blackBalls = 70;
+ double n = 40;
+ double sumProba = 0.;
+ //Determining the probability of the given grid under the null hypothesis.
+ double probabilityGrid = hypergeometric.density(x, whiteBalls, blackBalls, n);
+ //Computing the p-value by summing the probabilities of grids that are at least as unlikely as the current one.
+ for (double i = Math.max(0, n - blackBalls) ; i < Math.min(n, whiteBalls) + 1.E-13 ; i++) {
+ double thisProba = hypergeometric.density(i, whiteBalls, blackBalls, n);
+ if (thisProba < probabilityGrid + 1.E-13) {
+ sumProba += thisProba;
+ }
+ }
final double fisherExactPValue = FisherExact.twoTailed(40, 10, 100, 30);
- assertEquals("R result does not match gominer", gominerPValue, fisherExactPValue, EPSILON);
+ assertEquals("R result does not match hand-computed", sumProba, fisherExactPValue, EPSILON);
}
/**
--- a/goby-distribution/src/test/java/org/campagnelab/goby/stats/TestStatistics.java
+++ b/goby-distribution/src/test/java/org/campagnelab/goby/stats/TestStatistics.java
@@ -19,7 +19,7 @@
package org.campagnelab.goby.stats;
import org.campagnelab.goby.R.FisherExact;
-import gominer.Fisher;
+import DistLib.hypergeometric;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.lang.MutableString;
import org.apache.commons.math.MathException;
@@ -371,15 +371,27 @@
assertEquals("fisher test equal expected result", 0.5044757698516504, results.getStatistic(info, fisher.statisticIds.get(0)), 0.001);
- final Fisher fisherTest = new Fisher();
final int totalCountInA = 1700;
final int totalCountInB = 170; // equal total in each group
final int sumCountInA = 90;
final int sumCountInB = 45; // half the counts in sample B
+ //Variables names for the analogy with R arguments.
+ double x = sumCountInA;
+ double whiteBalls = sumCountInA + sumCountInB;
+ double blackBalls = totalCountInA + totalCountInB - whiteBalls;
+ double n = totalCountInA;
+ double sumProba = 0.;
+ //Determining the probability of the given grid under the null hypothesis.
+ double probabilityGrid = hypergeometric.density(x, whiteBalls, blackBalls, n);
+ //Computing the p-value by summing the probabilities of grids that are at least as unlikely as the current one.
+ for (double i = Math.max(0, n - blackBalls) ; i < Math.min(n, whiteBalls) + 1.E-13 ; i++) {
+ double thisProba = hypergeometric.density(i, whiteBalls, blackBalls, n);
+ if (thisProba < probabilityGrid + 1.E-13) {
+ sumProba += thisProba;
+ }
+ }
- fisherTest.fisher(totalCountInA, sumCountInA, totalCountInA + totalCountInB, sumCountInA + sumCountInB);
-
- final double pValue = fisherTest.getTwotail();
+ final double pValue = sumProba;
final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB));
final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB));
final ChiSquareTest chisquare = new ChiSquareTestImpl();
|