1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
#' @title Proportion of true null p-values
#' @description
#' Estimates the proportion of true null p-values, i.e., those following the Uniform(0,1) distribution.
#'
#' @param p A vector of p-values (only necessary input).
#' @param lambda The value of the tuning parameter to estimate
#' \eqn{\pi_0}{pi_0}. Must be in [0,1). Optional, see Storey (2002).
#' @param pi0.method Either "smoother" or "bootstrap"; the method for
#' automatically choosing tuning parameter in the estimation of \eqn{\pi_0}{pi_0},
#' the proportion of true null hypotheses.
#' @param smooth.df Number of degrees-of-freedom to use when estimating \eqn{\pi_0}{pi_0}
#' with a smoother. Optional.
#' @param smooth.log.pi0 If TRUE and \code{pi0.method} = "smoother", \eqn{\pi_0}{pi_0} will be
#' estimated by applying a smoother to a scatterplot of \eqn{\log(\pi_0)}{log(pi_0)} estimates
#' against the tuning parameter \eqn{\lambda}{lambda}. Optional.
#' @param \dots Arguments passed from \code{\link{qvalue}} function.
#'
#' @details
#' If no options are selected, then the method used to estimate \eqn{\pi_0}{pi_0} is
#' the smoother method described in Storey and Tibshirani (2003). The
#' bootstrap method is described in Storey, Taylor & Siegmund (2004). A closed form solution of the
#' bootstrap method is used in the package and is significantly faster.
#'
#' @return Returns a list:
#' \item{pi0}{A numeric that is the estimated proportion
#' of true null p-values.}
#' \item{pi0.lambda}{A vector of the proportion of null
#' values at the \eqn{\lambda}{lambda} values (see vignette).}
#' \item{lambda}{A vector of \eqn{\lambda}{lambda} value(s) utilized in calculating \code{pi0.lambda}.}
#' \item{pi0.smooth}{A vector of fitted values from the
#' smoother fit to the \eqn{\pi_0}{pi_0} estimates at each \code{lambda} value
#' (pi0.method="bootstrap" returns NULL).}
#'
#' @references
#' Storey JD. (2002) A direct approach to false discovery rates. Journal
#' of the Royal Statistical Society, Series B, 64: 479-498. \cr
#' \url{http://onlinelibrary.wiley.com/doi/10.1111/1467-9868.00346/abstract}
#'
#' Storey JD and Tibshirani R. (2003) Statistical significance for
#' genome-wide experiments. Proceedings of the National Academy of Sciences,
#' 100: 9440-9445. \cr
#" \url{http://www.pnas.org/content/100/16/9440.full}
#'
#' Storey JD. (2003) The positive false discovery rate: A Bayesian
#' interpretation and the q-value. Annals of Statistics, 31: 2013-2035. \cr
#' \url{http://projecteuclid.org/DPubS/Repository/1.0/Disseminate?view=body&id=pdf_1&handle=euclid.aos/1074290335}
#'
#' Storey JD, Taylor JE, and Siegmund D. (2004) Strong control,
#' conservative point estimation, and simultaneous conservative
#' consistency of false discovery rates: A unified approach. Journal of
#' the Royal Statistical Society, Series B, 66: 187-205. \cr
#' \url{http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2004.00439.x/abstract}
#'
#' Storey JD. (2011) False discovery rates. In \emph{International Encyclopedia of Statistical Science}. \cr
#' \url{http://genomine.org/papers/Storey_FDR_2011.pdf} \cr
#' \url{http://www.springer.com/statistics/book/978-3-642-04897-5}
#'
#' @examples
#' # import data
#' data(hedenfalk)
#' p <- hedenfalk$p
#'
#' # proportion of null p-values
#' nullRatio <- pi0est(p)
#' nullRatioS <- pi0est(p, lambda=seq(0.40, 0.95, 0.05), smooth.log.pi0="TRUE")
#' nullRatioM <- pi0est(p, pi0.method="bootstrap")
#'
#' # check behavior of estimate over lambda
#' # also, pi0est arguments can be passed to qvalue
#' qobj = qvalue(p, lambda=seq(0.05, 0.95, 0.1), smooth.log.pi0="TRUE")
#' hist(qobj)
#' plot(qobj)
#'
#' @author John D. Storey
#' @seealso \code{\link{qvalue}}
#' @keywords pi0est, proportion true nulls
#' @aliases pi0est
#' @export
pi0est <- function(p, lambda = seq(0.05,0.95,0.05), pi0.method = c("smoother", "bootstrap"),
smooth.df = 3, smooth.log.pi0 = FALSE, ...) {
# Check input arguments
rm_na <- !is.na(p)
p <- p[rm_na]
pi0.method = match.arg(pi0.method)
m <- length(p)
lambda <- sort(lambda) # guard against user input
ll <- length(lambda)
if (min(p) < 0 || max(p) > 1) {
stop("ERROR: p-values not in valid range [0, 1].")
} else if (ll > 1 && ll < 4) {
stop(sprintf(paste("ERROR:", paste("length(lambda)=", ll, ".", sep=""),
"If length of lambda greater than 1,",
"you need at least 4 values.")))
} else if (min(lambda) < 0 || max(lambda) >= 1) {
stop("ERROR: Lambda must be within [0, 1).")
}
# Determines pi0
if (ll == 1) {
pi0 <- mean(p >= lambda)/(1 - lambda)
pi0.lambda <- pi0
pi0 <- min(pi0, 1)
pi0Smooth <- NULL
} else {
ind <- length(lambda):1
pi0 <- cumsum(tabulate(findInterval(p, vec=lambda))[ind]) / (length(p) * (1-lambda[ind]))
pi0 <- pi0[ind]
pi0.lambda <- pi0
# Smoother method approximation
if (pi0.method == "smoother") {
if (smooth.log.pi0) {
pi0 <- log(pi0)
spi0 <- smooth.spline(lambda, pi0, df = smooth.df)
pi0Smooth <- exp(predict(spi0, x = lambda)$y)
pi0 <- min(pi0Smooth[ll], 1)
} else {
spi0 <- smooth.spline(lambda, pi0, df = smooth.df)
pi0Smooth <- predict(spi0, x = lambda)$y
pi0 <- min(pi0Smooth[ll], 1)
}
} else if (pi0.method == "bootstrap") {
# Bootstrap method closed form solution by David Robinson
minpi0 <- quantile(pi0, prob = 0.1)
W <- sapply(lambda, function(l) sum(p >= l))
mse <- (W / (m ^ 2 * (1 - lambda) ^ 2)) * (1 - W / m) + (pi0 - minpi0) ^ 2
pi0 <- min(pi0[mse == min(mse)], 1)
pi0Smooth <- NULL
} else {
stop('ERROR: pi0.method must be one of "smoother" or "bootstrap".')
}
}
if (pi0 <= 0) {
stop("ERROR: The estimated pi0 <= 0. Check that you have valid p-values or use a different range of lambda.")
}
return(list(pi0 = pi0, pi0.lambda = pi0.lambda,
lambda = lambda, pi0.smooth = pi0Smooth))
}
|