File: plot_qvalue.R

package info (click to toggle)
r-bioc-qvalue 2.38.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,952 kB
  • sloc: makefile: 16
file content (149 lines) | stat: -rw-r--r-- 6,422 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#' @title Plotting function for q-value object
#' @description
#'  Graphical display of the q-value object
#'
#' @param x A q-value object.
#' @param rng Range of q-values to show. Optional
#' @param \ldots Additional arguments. Currently unused.
#'
#' @details
#' The function plot allows one to view several plots:
#' \enumerate{
#'  \item The estimated \eqn{\pi_0}{pi_0} versus the tuning parameter
#'  \eqn{\lambda}{lambda}.
#'  \item The q-values versus the p-values.
#'  \item The number of significant tests versus each q-value cutoff.
#'  \item The number of expected false positives versus the number of
#'  significant tests.
#'  }
#'
#' This function makes four plots. The first is a plot of the
#' estimate of \eqn{\pi_0}{pi_0} versus its tuning parameter
#' \eqn{\lambda}{lambda}. In most cases, as \eqn{\lambda}{lambda}
#' gets larger, the bias of the estimate decreases, yet the variance
#' increases. Various methods exist for balancing this bias-variance
#' trade-off (Storey 2002, Storey & Tibshirani 2003, Storey, Taylor
#' & Siegmund 2004). Comparing your estimate of \eqn{\pi_0}{pi_0} to this
#' plot allows one to guage its quality. The remaining three plots
#' show how many tests are called significant and how many false
#' positives to expect for each q-value cut-off. A thorough discussion of
#' these plots can be found in Storey & Tibshirani (2003).
#'
#' @return
#' Nothing of interest.
#'
#' @references
#' Storey JD. (2002) A direct approach to false discovery rates. Journal
#' of the Royal Statistical Society, Series B, 64: 479-498. \cr
#' \url{http://onlinelibrary.wiley.com/doi/10.1111/1467-9868.00346/abstract}
#'
#' Storey JD and Tibshirani R. (2003) Statistical significance for
#' genome-wide experiments. Proceedings of the National Academy of Sciences,
#' 100: 9440-9445. \cr
#' \url{http://www.pnas.org/content/100/16/9440.full}
#'
#' Storey JD. (2003) The positive false discovery rate: A Bayesian
#' interpretation and the q-value. Annals of Statistics, 31: 2013-2035. \cr
#' \url{http://projecteuclid.org/DPubS/Repository/1.0/Disseminate?view=body&id=pdf_1&handle=euclid.aos/1074290335}
#'
#' Storey JD, Taylor JE, and Siegmund D. (2004) Strong control,
#' conservative point estimation, and simultaneous conservative
#' consistency of false discovery rates: A unified approach. Journal of
#' the Royal Statistical Society, Series B, 66: 187-205. \cr
#" \url{http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2004.00439.x/abstract}
#'
#' Storey JD. (2011) False discovery rates. In \emph{International Encyclopedia of Statistical Science}. \cr
#' \url{http://genomine.org/papers/Storey_FDR_2011.pdf} \cr
#' \url{http://www.springer.com/statistics/book/978-3-642-04897-5}
#'
#' @examples
#' # import data
#' data(hedenfalk)
#' p <- hedenfalk$p
#' qobj <- qvalue(p)
#'
#" # view plots for q-values between 0 and 0.3:
#' plot(qobj, rng=c(0.0, 0.3))
#'
#' @author John D. Storey, Andrew J. Bass
#' @seealso \code{\link{qvalue}}, \code{\link{write.qvalue}}, \code{\link{summary.qvalue}}
#' @keywords plot
#' @aliases plot, plot.qvalue
#' @export
plot.qvalue <- function(x, rng = c(0.0, 0.1), ...) {
  # Plotting function for q-object.
  #
  # Args:
  #   x: A q-value object returned by the qvalue function.
  #   rng: The range of q-values to be plotted (optional).
  #
  # Returns
  #   Four plots-
  #     Upper-left: pi0.hat(lambda) versus lambda
  #     Upper-right: q-values versus p-values
  #     Lower-left: number of significant tests per each q-value cut-off
  #     Lower-right: number of expected false positives versus number of
  #                  significant tests
  # Initilizations
  plot.call <- match.call()
  rm_na <- !is.na(x$pvalues)
  pvalues <- x$pvalues[rm_na]
  qvalues <- x$qvalues[rm_na]
  q.ord <- qvalues[order(pvalues)]
  if (min(q.ord) > rng[2]) {
    rng <- c(min(q.ord), quantile(q.ord, 0.1))
  }
  p.ord <- pvalues[order(pvalues)]
  lambda <- x$lambda
  pi0Smooth <- x$pi0.smooth
  if (length(lambda) == 1) {
    lambda <- sort(unique(c(lambda, seq(0, max(0.90, lambda), 0.05))))
  }
  pi0 <- x$pi0.lambda
  pi00 <- round(x$pi0, 3)
  pi0.df <- data.frame(lambda = lambda, pi0 = pi0)
  # Spline fit- pi0Smooth NULL implies bootstrap
  if (is.null(pi0Smooth)) {
    p1.smooth <- NULL
  } else {
    spi0.df <- data.frame(lambda = lambda, pi0 = pi0Smooth)
    p1.smooth <- geom_line(data = spi0.df, aes_string(x = 'lambda', y = 'pi0'),
                           colour="red")
  }
  # Subplots
  p1 <-  ggplot(pi0.df, aes_string(x = 'lambda', y = 'pi0')) +
                geom_point() +
                p1.smooth +
                geom_abline(intercept = pi00,
                            slope = 0,
                            lty = 2,
                            colour = "red",
                            size = .6) +
                xlab(expression(lambda)) +
                ylab(expression(hat(pi)[0](lambda))) +
                xlim(min(lambda) - .05, max(lambda) + 0.05) +
                annotate("text", label = paste("hat(pi)[0] ==", pi00),
				                 x = min(lambda, 1) + (max(lambda) - min(lambda))/20,
				                 y = x$pi0 - (max(pi0) - min(pi0))/20,
                         parse = TRUE, size = 3) + theme_bw() + scale_color_brewer(palette = "Set1")
  p2 <- ggplot(data.frame(pvalue = p.ord[q.ord >= rng[1] & q.ord <= rng[2]],
                          qvalue = q.ord[q.ord >= rng[1] & q.ord <= rng[2]]),
                          aes_string(x = 'pvalue', y = 'qvalue')) +
               xlab("p-value") +
               ylab("q-value")  +
               geom_line()+ theme_bw()
  p3 <- ggplot(data.frame(qCuttOff = q.ord[q.ord >= rng[1] & q.ord <= rng[2]],
                          sig=(1 + sum(q.ord < rng[1])):sum(q.ord <= rng[2])),
                          aes_string(x = 'qCuttOff', y = 'sig')) +
               xlab("q-value cut-off") +
               ylab("significant tests")  +
               geom_line()+ theme_bw()
  p4 <- ggplot(data.frame(sig = (1 + sum(q.ord < rng[1])):sum(q.ord <= rng[2]),
                          expFP = q.ord[q.ord >= rng[1] & q.ord <= rng[2]] *
                                (1 + sum(q.ord < rng[1])):sum(q.ord <= rng[2])),
                          aes_string(x = 'sig', y = 'expFP')) +
               xlab("significant tests") +
               ylab("expected false positives")  +
               geom_line()+ theme_bw()
  multiplot(p1, p2, p3, p4, cols = 2)
}