File: test_de.Rd

package info (click to toggle)
r-bioc-glmgampoi 1.2.0%2Bdfsg-6
links: PTS, VCS
area: main
in suites: bullseye
size: 704 kB
sloc: cpp: 523; ansic: 114; sh: 13; makefile: 2
file content (156 lines) | stat: -rw-r--r-- 6,604 bytes
parent folder | download | duplicates (2)
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/test_de.R
\name{test_de}
\alias{test_de}
\title{Test for Differential Expression}
\usage{
test_de(
  fit,
  contrast,
  reduced_design = NULL,
  full_design = fit$model_matrix,
  subset_to = NULL,
  pseudobulk_by = NULL,
  pval_adjust_method = "BH",
  sort_by = NULL,
  decreasing = FALSE,
  n_max = Inf,
  verbose = FALSE
)
}
\arguments{
\item{fit}{object of class \code{glmGamPoi}. Usually the result of calling \code{glm_gp(data, ...)}}

\item{contrast}{The contrast to test. Can be a single column name (quoted or as a string)
that is removed from the  full model matrix of \code{fit}. Or a complex contrast comparing two
or more columns: e.g. \code{A - B}, \code{"A - 3 * B"}, \code{(A + B) / 2 - C} etc. \cr
Only one of \code{contrast} or \code{reduced_design} must be specified.}

\item{reduced_design}{a specification of the reduced design used as a comparison to see what
how much better \code{fit} describes the data.
Analogous to the \code{design} parameter in \code{glm_gp()}, it can be either a \code{formula},
a \code{model.matrix()}, or a \code{vector}. \cr
Only one of \code{contrast} or \code{reduced_design} must be specified.}

\item{full_design}{option to specify an alternative \code{full_design} that can differ from
\code{fit$model_matrix}. Can be a \code{formula} or a \code{matrix}. Default: \code{fit$model_matrix}}

\item{subset_to}{a vector with the same length as \code{ncol(fit$data)} or  an expression
that evaluates to such a vector. The expression can reference columns from \code{colData(fit$data)}.
A typical use case in single cell analysis would be to subset to a specific cell type
(e.g. \code{subset_to = cell_type == "T-cells"}). Note that if this argument is set a new
the model for the \code{full_design} is re-fit.\cr
Default: \code{NULL} which means that the data is not subset.}

\item{pseudobulk_by}{a vector with the same length as \code{ncol(fit$data)} that is used to
split the columns into different groups (calls \code{\link[=split]{split()}}). \code{pseudobulk_by} can also be an
expression that evaluates to a vector. The expression can reference columns from \code{colData(fit$data)}. \cr
The counts are summed across the groups
to create "pseudobulk" samples. This is typically used in single cell analysis if the cells come
from different samples to get a proper estimate of the differences. This is particularly powerful
in combination with the \code{subset_to} parameter to analyze differences between samples for
subgroups of cells. Note that this does a fresh fit for both the full and the reduced design.
Default: \code{NULL} which means that the data is not aggregated.}

\item{pval_adjust_method}{one of the p-value adjustment method from
\link{p.adjust.methods}. Default: \code{"BH"}.}

\item{sort_by}{the name of the column or an expression used to sort the result. If \code{sort_by} is \code{NULL}
the table is not sorted. Default: \code{NULL}}

\item{decreasing}{boolean to decide if the result is sorted increasing or decreasing
order. Default: \code{FALSE}.}

\item{n_max}{the maximum number of rows to return. Default: \code{Inf} which means that all
rows are returned}

\item{verbose}{a boolean that indicates if information about the individual steps are printed
while fitting the GLM. Default: \code{FALSE}.}
}
\value{
a \code{data.frame} with the following columns
\describe{
\item{name}{the rownames of the input data}
\item{pval}{the p-values of the quasi-likelihood ratio test}
\item{adj_pval}{the adjusted p-values returned from \code{\link[=p.adjust]{p.adjust()}}}
\item{f_statistic}{the F-statistic: \eqn{F = (Dev_full - Dev_red) / (df_1 * disp_ql-shrunken)}}
\item{df1}{the degrees of freedom of the test: \code{ncol(design) - ncol(reduced_design)}}
\item{df2}{the degrees of freedom of the fit: \code{ncol(data) - ncol(design) + df_0}}
\item{lfc}{the log2-fold change. If the alternative model is specified by \code{reduced_design} will
be \code{NA}.}
}
}
\description{
Conduct a quasi-likelihood ratio test for a Gamma-Poisson fit.
}
\examples{
  Y <- matrix(rnbinom(n = 30 * 100, mu = 4, size = 0.3), nrow = 30, ncol = 100)
  annot <- data.frame(sample = sample(LETTERS[1:6], size = 100, replace = TRUE),
                      cont1 = rnorm(100), cont2 = rnorm(100, mean = 30))
  annot$condition <- ifelse(annot$sample \%in\% c("A", "B", "C"), "ctrl", "treated")
  head(annot)
  se <- SummarizedExperiment::SummarizedExperiment(Y, colData = annot)
  fit <- glm_gp(se, design = ~ condition + cont1 + cont2)

  # Test with reduced design
  res <- test_de(fit, reduced_design = ~ condition + cont1)
  head(res)

  # Test with contrast argument, the results are identical
  res2 <- test_de(fit, contrast = cont2)
  head(res2)

  # The column names of fit$Beta are valid variables in the contrast argument
  colnames(fit$Beta)


  # You can also have more complex contrasts:
  # the following compares cont1 vs cont2:
  test_de(fit, cont1 - cont2, n_max = 4)


  # You can also sort the output
  test_de(fit, cont1 - cont2, n_max = 4,
          sort_by = "pval")

  test_de(fit, cont1 - cont2, n_max = 4,
          sort_by = - abs(f_statistic))

  # If the data has multiple samples, it is a good
  # idea to aggregate the cell counts by samples.
  # This is called "pseudobulk".
  test_de(fit, contrast = "conditiontreated", n_max = 4,
          pseudobulk_by = sample)


  # You can also do the pseudobulk only on a subset of cells:
  cell_types <- sample(c("Tcell", "Bcell", "Makrophages"), size = 100, replace = TRUE)
  test_de(fit, contrast = "conditiontreated", n_max = 4,
          pseudobulk_by = sample,
          subset_to = cell_types == "Bcell")


  # Be care full, if you included the cell type information in
  # the original fit, after subsetting the design matrix would
  # be degenerate. To fix this, specify the full_design in 'test_de()'
  SummarizedExperiment::colData(se)$ct <- cell_types
  fit_with_celltype <- glm_gp(se, design = ~ condition + cont1 + cont2 + ct)
  test_de(fit_with_celltype, contrast = cont1, n_max = 4,
          full_design =  ~ condition + cont1 + cont2,
          pseudobulk_by = sample,
          subset_to = ct == "Bcell")



}
\references{
\itemize{
\item Lund, S. P., Nettleton, D., McCarthy, D. J., & Smyth, G. K. (2012). Detecting differential expression
in RNA-sequence data using quasi-likelihood with shrunken dispersion estimates. Statistical
Applications in Genetics and Molecular Biology, 11(5).
\url{https://doi.org/10.1515/1544-6115.1826}.
}
}
\seealso{
\code{\link[=glm_gp]{glm_gp()}}
}