1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/findMarkers.R
\name{findMarkers}
\alias{findMarkers}
\alias{findMarkers,ANY-method}
\alias{findMarkers,SummarizedExperiment-method}
\alias{findMarkers,SingleCellExperiment-method}
\title{Find marker genes}
\usage{
findMarkers(x, ...)
\S4method{findMarkers}{ANY}(
x,
groups,
test.type = c("t", "wilcox", "binom"),
...,
pval.type = c("any", "some", "all"),
min.prop = NULL,
log.p = FALSE,
full.stats = FALSE,
sorted = TRUE,
row.data = NULL,
add.summary = FALSE,
BPPARAM = SerialParam()
)
\S4method{findMarkers}{SummarizedExperiment}(x, ..., assay.type = "logcounts")
\S4method{findMarkers}{SingleCellExperiment}(x, groups = colLabels(x, onAbsence = "error"), ...)
}
\arguments{
\item{x}{A numeric matrix-like object of expression values,
where each column corresponds to a cell and each row corresponds to an endogenous gene.
This is expected to be normalized log-expression values for most tests - see Details.
Alternatively, a \linkS4class{SummarizedExperiment} or \linkS4class{SingleCellExperiment} object containing such a matrix.}
\item{...}{For the generic, further arguments to pass to specific methods.
For the ANY method:
\itemize{
\item For \code{test.type="t"}, further arguments to pass to \code{\link{pairwiseTTests}}.
\item For \code{test.type="wilcox"}, further arguments to pass to \code{\link{pairwiseWilcox}}.
\item For \code{test.type="binom"}, further arguments to pass to \code{\link{pairwiseBinom}}.
}
Common arguments for all testing functions include \code{gene.names}, \code{direction},
\code{block} and \code{BPPARAM}.
Test-specific arguments are also supported for the appropriate \code{test.type}.
For the SummarizedExperiment method, further arguments to pass to the ANY method.
For the SingleCellExperiment method, further arguments to pass to the SummarizedExperiment method.}
\item{groups}{A vector of length equal to \code{ncol(x)},
specifying the group to which each cell is assigned.
If \code{x} is a \linkS4class{SingleCellExperiment}, this defaults to \code{\link{colLabels}(x)} if available.}
\item{test.type}{String specifying the type of pairwise test to perform -
a t-test with \code{"t"}, a Wilcoxon rank sum test with \code{"wilcox"},
or a binomial test with \code{"binom"}.}
\item{pval.type}{A string specifying how p-values are to be combined across pairwise comparisons for a given group/cluster.}
\item{min.prop}{Numeric scalar specifying the minimum proportion of significant comparisons per gene,
Defaults to 0.5 when \code{pval.type="some"}, otherwise defaults to zero.}
\item{log.p}{A logical scalar indicating if log-transformed p-values/FDRs should be returned.}
\item{full.stats}{A logical scalar indicating whether all statistics in \code{de.lists} should be stored in the output for each pairwise comparison.}
\item{sorted}{Logical scalar indicating whether each output DataFrame should be sorted by a statistic relevant to \code{pval.type}.}
\item{row.data}{A \linkS4class{DataFrame} containing additional row metadata for each gene in \code{x},
to be included in each of the output DataFrames.
This should generally have row names identical to those of \code{x}.
Alternatively, a list containing one such DataFrame per level of \code{groups},
where each DataFrame contains group-specific metadata for each gene to be included in the appropriate output DataFrame.}
\item{add.summary}{Logical scalar indicating whether statistics from \code{\link{summaryMarkerStats}} should be added.}
\item{BPPARAM}{A \linkS4class{BiocParallelParam} object indicating whether and how parallelization should be performed across genes.}
\item{assay.type}{A string specifying which assay values to use, usually \code{"logcounts"}.}
}
\value{
A named list of \linkS4class{DataFrame}s, each of which contains a sorted marker gene list for the corresponding group.
In each DataFrame, the top genes are chosen to enable separation of that group from all other groups.
See \code{?\link{combineMarkers}} for more details on the output format.
If \code{row.data} is provided, the additional fields are added to the front of the DataFrame for each cluster.
If \code{add.summary=TRUE}, extra statistics for each cluster are also computed and added.
Any log-fold changes are reported as differences in average \code{x} between groups
(usually in base 2, depending on the transformation applied to \code{x}).
}
\description{
Find candidate marker genes for groups of cells (e.g., clusters)
by testing for differential expression between pairs of groups.
}
\details{
This function provides a convenience wrapper for marker gene identification between groups of cells,
based on running \code{\link{pairwiseTTests}} or related functions and passing the result to \code{\link{combineMarkers}}.
All of the arguments above are supplied directly to one of these two functions -
refer to the relevant function's documentation for more details.
If \code{x} contains log-normalized expression values generated with a pseudo-count of 1,
it can be used in any of the pairwise testing procedures.
If \code{x} is scale-normalized but not log-transformed, it can be used with \code{test.type="wilcox"} and \code{test.type="binom"}.
If \code{x} contains raw counts, it can only be used with \code{test.type="binom"}.
Note that \code{log.p} only affects the combined p-values and FDRs.
If \code{full.stats=TRUE}, the p-values for each individual pairwise comparison will always be log-transformed,
regardless of the value of \code{log.p}.
Log-transformed p-values and FDRs are reported using the natural base.
The choice of \code{pval.type} determines whether the highly ranked genes are those that are DE between the current group and:
\itemize{
\item any other group (\code{"any"})
\item all other groups (\code{"all"})
\item some other groups (\code{"some"})
}
See \code{?\link{combineMarkers}} for more details.
}
\examples{
library(scuttle)
sce <- mockSCE()
sce <- logNormCounts(sce)
# Any clustering method is okay, only using k-means for convenience.
kout <- kmeans(t(logcounts(sce)), centers=4)
out <- findMarkers(sce, groups=kout$cluster)
names(out)
out[[1]]
# More customization of the tests:
out <- findMarkers(sce, groups=kout$cluster, test.type="wilcox")
out[[1]]
out <- findMarkers(sce, groups=kout$cluster, lfc=1, direction="up")
out[[1]]
out <- findMarkers(sce, groups=kout$cluster, pval.type="all")
out[[1]]
}
\seealso{
\code{\link{pairwiseTTests}},
\code{\link{pairwiseWilcox}},
\code{\link{pairwiseBinom}},
for the underlying functions that compute the pairwise DE statistics.
\code{\link{combineMarkers}}, to combine pairwise statistics into a single marker list per cluster.
\code{\link{summaryMarkerStats}}, to incorporate additional summary statistics per cluster.
\code{\link{getMarkerEffects}}, to easily extract a matrix of effect sizes from each DataFrame.
}
\author{
Aaron Lun
}
|