File: test_de.Rd

package info (click to toggle)
r-bioc-glmgampoi 1.2.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 704 kB
  • sloc: cpp: 523; ansic: 114; sh: 13; makefile: 2
file content (156 lines) | stat: -rw-r--r-- 6,604 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/test_de.R
\name{test_de}
\alias{test_de}
\title{Test for Differential Expression}
\usage{
test_de(
  fit,
  contrast,
  reduced_design = NULL,
  full_design = fit$model_matrix,
  subset_to = NULL,
  pseudobulk_by = NULL,
  pval_adjust_method = "BH",
  sort_by = NULL,
  decreasing = FALSE,
  n_max = Inf,
  verbose = FALSE
)
}
\arguments{
\item{fit}{object of class \code{glmGamPoi}. Usually the result of calling \code{glm_gp(data, ...)}}

\item{contrast}{The contrast to test. Can be a single column name (quoted or as a string)
that is removed from the  full model matrix of \code{fit}. Or a complex contrast comparing two
or more columns: e.g. \code{A - B}, \code{"A - 3 * B"}, \code{(A + B) / 2 - C} etc. \cr
Only one of \code{contrast} or \code{reduced_design} must be specified.}

\item{reduced_design}{a specification of the reduced design used as a comparison to see what
how much better \code{fit} describes the data.
Analogous to the \code{design} parameter in \code{glm_gp()}, it can be either a \code{formula},
a \code{model.matrix()}, or a \code{vector}. \cr
Only one of \code{contrast} or \code{reduced_design} must be specified.}

\item{full_design}{option to specify an alternative \code{full_design} that can differ from
\code{fit$model_matrix}. Can be a \code{formula} or a \code{matrix}. Default: \code{fit$model_matrix}}

\item{subset_to}{a vector with the same length as \code{ncol(fit$data)} or  an expression
that evaluates to such a vector. The expression can reference columns from \code{colData(fit$data)}.
A typical use case in single cell analysis would be to subset to a specific cell type
(e.g. \code{subset_to = cell_type == "T-cells"}). Note that if this argument is set a new
the model for the \code{full_design} is re-fit.\cr
Default: \code{NULL} which means that the data is not subset.}

\item{pseudobulk_by}{a vector with the same length as \code{ncol(fit$data)} that is used to
split the columns into different groups (calls \code{\link[=split]{split()}}). \code{pseudobulk_by} can also be an
expression that evaluates to a vector. The expression can reference columns from \code{colData(fit$data)}. \cr
The counts are summed across the groups
to create "pseudobulk" samples. This is typically used in single cell analysis if the cells come
from different samples to get a proper estimate of the differences. This is particularly powerful
in combination with the \code{subset_to} parameter to analyze differences between samples for
subgroups of cells. Note that this does a fresh fit for both the full and the reduced design.
Default: \code{NULL} which means that the data is not aggregated.}

\item{pval_adjust_method}{one of the p-value adjustment method from
\link{p.adjust.methods}. Default: \code{"BH"}.}

\item{sort_by}{the name of the column or an expression used to sort the result. If \code{sort_by} is \code{NULL}
the table is not sorted. Default: \code{NULL}}

\item{decreasing}{boolean to decide if the result is sorted increasing or decreasing
order. Default: \code{FALSE}.}

\item{n_max}{the maximum number of rows to return. Default: \code{Inf} which means that all
rows are returned}

\item{verbose}{a boolean that indicates if information about the individual steps are printed
while fitting the GLM. Default: \code{FALSE}.}
}
\value{
a \code{data.frame} with the following columns
\describe{
\item{name}{the rownames of the input data}
\item{pval}{the p-values of the quasi-likelihood ratio test}
\item{adj_pval}{the adjusted p-values returned from \code{\link[=p.adjust]{p.adjust()}}}
\item{f_statistic}{the F-statistic: \eqn{F = (Dev_full - Dev_red) / (df_1 * disp_ql-shrunken)}}
\item{df1}{the degrees of freedom of the test: \code{ncol(design) - ncol(reduced_design)}}
\item{df2}{the degrees of freedom of the fit: \code{ncol(data) - ncol(design) + df_0}}
\item{lfc}{the log2-fold change. If the alternative model is specified by \code{reduced_design} will
be \code{NA}.}
}
}
\description{
Conduct a quasi-likelihood ratio test for a Gamma-Poisson fit.
}
\examples{
  Y <- matrix(rnbinom(n = 30 * 100, mu = 4, size = 0.3), nrow = 30, ncol = 100)
  annot <- data.frame(sample = sample(LETTERS[1:6], size = 100, replace = TRUE),
                      cont1 = rnorm(100), cont2 = rnorm(100, mean = 30))
  annot$condition <- ifelse(annot$sample \%in\% c("A", "B", "C"), "ctrl", "treated")
  head(annot)
  se <- SummarizedExperiment::SummarizedExperiment(Y, colData = annot)
  fit <- glm_gp(se, design = ~ condition + cont1 + cont2)

  # Test with reduced design
  res <- test_de(fit, reduced_design = ~ condition + cont1)
  head(res)

  # Test with contrast argument, the results are identical
  res2 <- test_de(fit, contrast = cont2)
  head(res2)

  # The column names of fit$Beta are valid variables in the contrast argument
  colnames(fit$Beta)


  # You can also have more complex contrasts:
  # the following compares cont1 vs cont2:
  test_de(fit, cont1 - cont2, n_max = 4)


  # You can also sort the output
  test_de(fit, cont1 - cont2, n_max = 4,
          sort_by = "pval")

  test_de(fit, cont1 - cont2, n_max = 4,
          sort_by = - abs(f_statistic))

  # If the data has multiple samples, it is a good
  # idea to aggregate the cell counts by samples.
  # This is called "pseudobulk".
  test_de(fit, contrast = "conditiontreated", n_max = 4,
          pseudobulk_by = sample)


  # You can also do the pseudobulk only on a subset of cells:
  cell_types <- sample(c("Tcell", "Bcell", "Makrophages"), size = 100, replace = TRUE)
  test_de(fit, contrast = "conditiontreated", n_max = 4,
          pseudobulk_by = sample,
          subset_to = cell_types == "Bcell")


  # Be care full, if you included the cell type information in
  # the original fit, after subsetting the design matrix would
  # be degenerate. To fix this, specify the full_design in 'test_de()'
  SummarizedExperiment::colData(se)$ct <- cell_types
  fit_with_celltype <- glm_gp(se, design = ~ condition + cont1 + cont2 + ct)
  test_de(fit_with_celltype, contrast = cont1, n_max = 4,
          full_design =  ~ condition + cont1 + cont2,
          pseudobulk_by = sample,
          subset_to = ct == "Bcell")



}
\references{
\itemize{
\item Lund, S. P., Nettleton, D., McCarthy, D. J., & Smyth, G. K. (2012). Detecting differential expression
in RNA-sequence data using quasi-likelihood with shrunken dispersion estimates. Statistical
Applications in Genetics and Molecular Biology, 11(5).
\url{https://doi.org/10.1515/1544-6115.1826}.
}
}
\seealso{
\code{\link[=glm_gp]{glm_gp()}}
}