1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/perCellQCFilters.R
\name{perCellQCFilters}
\alias{perCellQCFilters}
\title{Compute filters for low-quality cells}
\usage{
perCellQCFilters(
x,
sum.field = "sum",
detected.field = "detected",
sub.fields = NULL,
...
)
}
\arguments{
\item{x}{A \linkS4class{DataFrame} containing per-cell QC statistics, as computed by \code{\link{perCellQCMetrics}}.}
\item{sum.field}{String specifying the column of \code{x} containing the library size for each cell.}
\item{detected.field}{String specifying the column of \code{x} containing the number of detected features per cell.}
\item{sub.fields}{Character vector specifying the column(s) of \code{x} containing the percentage of counts in subsets of \dQuote{control features},
usually mitochondrial genes or spike-in transcripts.
If set to \code{TRUE}, this will default to all columns in \code{x} with names following the patterns \code{"subsets_.*_percent"} and \code{"altexps_.*_percent"}.}
\item{...}{Further arguments to pass to \code{\link{isOutlier}}.}
}
\value{
A \linkS4class{DataFrame} with one row per cell and containing columns of logical vectors.
Each column specifies a reason for why a cell was considered to be low quality,
with the final \code{discard} column indicating whether the cell should be discarded.
}
\description{
Identifies low-quality cells as outliers for frequently used QC metrics.
}
\details{
This function simply calls \code{\link{isOutlier}} on the various QC metrics in \code{x}.
\itemize{
\item For \code{sum.field}, small outliers are detected.
These are considered to represent low-quality cells that have not been insufficiently sequenced.
Detection is performed on the log-scale to adjust for a heavy right tail and to improve resolution at zero.
\item For \code{detected.field}, small outliers are detected.
These are considered to represent low-quality cells with low-complexity libraries.
Detection is performed on the log-scale to adjust for a heavy right tail.
This is done on the log-scale to adjust for a heavy right tail and to improve resolution at zero.
\item For each column specified by \code{sub.fields}, large outliers are detected.
This aims to remove cells with high spike-in or mitochondrial content, usually corresponding to damaged cells.
While these distributions often have heavy right tails, the putative low-quality cells are often present in this tail;
thus, transformation is not performed to ensure maintain resolution of the filter.
}
Users can control the outlier detection (e.g., change the number of MADs, specify batches)
by passing appropriate arguments to \code{...}.
}
\examples{
example_sce <- mockSCE()
x <- perCellQCMetrics(example_sce, subsets=list(Mito=1:100))
discarded <- perCellQCFilters(x,
sub.fields=c("subsets_Mito_percent", "altexps_Spikes_percent"))
colSums(as.data.frame(discarded))
}
\seealso{
\code{\link{perCellQCMetrics}}, for calculation of these metrics.
\code{\link{isOutlier}}, to identify outliers with a MAD-based approach.
}
\author{
Aaron Lun
}
|