1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
|
\name{cpm}
\alias{cpm}
\alias{cpm.DGEList}
\alias{cpm.SummarizedExperiment}
\alias{cpm.DGEGLM}
\alias{cpm.DGELRT}
\alias{cpm.default}
\alias{rpkm}
\alias{rpkm.DGEList}
\alias{rpkm.SummarizedExperiment}
\alias{rpkm.DGEGLM}
\alias{rpkm.DGELRT}
\alias{rpkm.default}
\alias{cpmByGroup}
\alias{cpmByGroup.DGEList}
\alias{cpmByGroup.SummarizedExperiment}
\alias{cpmByGroup.default}
\alias{rpkmByGroup}
\alias{rpkmByGroup.DGEList}
\alias{rpkmByGroup.SummarizedExperiment}
\alias{rpkmByGroup.default}
\title{Counts per Million or Reads per Kilobase per Million}
\description{Compute counts per million (CPM) or reads per kilobase per million (RPKM).}
\usage{
\method{cpm}{DGEList}(y, normalized.lib.sizes = TRUE,
log = FALSE, prior.count = 2, \dots)
\method{cpm}{SummarizedExperiment}(y, normalized.lib.sizes = TRUE,
log = FALSE, prior.count = 2, \dots)
\method{cpm}{DGEGLM}(y, log = FALSE, shrunk = TRUE, \dots)
\method{cpm}{default}(y, lib.size = NULL, offset=NULL,
log = FALSE, prior.count = 2, \dots)
\method{rpkm}{DGEList}(y, gene.length = NULL, normalized.lib.sizes = TRUE,
log = FALSE, prior.count = 2, \dots)
\method{rpkm}{SummarizedExperiment}(y, gene.length = NULL, normalized.lib.sizes = TRUE,
log = FALSE, prior.count = 2, \dots)
\method{rpkm}{DGEGLM}(y, gene.length, log = FALSE, shrunk = TRUE, \dots)
\method{rpkm}{default}(y, gene.length, lib.size = NULL, offset=NULL,
log = FALSE, prior.count = 2, \dots)
\method{cpmByGroup}{DGEList}(y, group = NULL, dispersion = NULL, \ldots)
\method{cpmByGroup}{SummarizedExperiment}(y, group = NULL, dispersion = NULL, \ldots)
\method{cpmByGroup}{default}(y, group = NULL, dispersion = 0.05,
offset = NULL, weights = NULL, log = FALSE, prior.count = 2, \ldots)
\method{rpkmByGroup}{DGEList}(y, group = NULL, gene.length = NULL, dispersion = NULL, \ldots)
\method{rpkmByGroup}{SummarizedExperiment}(y, group = NULL, gene.length = NULL, dispersion = NULL, \ldots)
\method{rpkmByGroup}{default}(y, group = NULL, gene.length, dispersion = 0.05,
offset = NULL, weights = NULL, log = FALSE, prior.count = 2, \ldots)
}
\arguments{
\item{y}{
a matrix-like object containing counts.
Can be a numeric matrix, a \code{DGEList} object, a \code{SummarizedExperiment} object with a \code{"counts"} assay, or any object that can be coerced to a matrix by \code{as.matrix}.
For \code{cpm} and \code{rpkm}, it can also be a \code{DGEGLM} or \code{DGELRT} object.
}
\item{normalized.lib.sizes}{logical, use normalized library sizes?}
\item{lib.size}{library size, defaults to \code{colSums(y)}. Ignored if \code{offset} is specified.}
\item{offset}{numeric matrix of same size as \code{y}, or a vector of length \code{ncol(y)}, representing library sizes on the log scale. Can also be a scalar for \code{cpmByGroup.default} and \code{rpkmByGroup.default}. If specified, then takes precedence over \code{lib.size}.}
\item{log}{logical, if \code{TRUE} then \code{log2} values are returned.}
\item{prior.count}{average count to be added to each observation to avoid taking log of zero. Used only if \code{log=TRUE}.}
\item{shrunk}{
logical, if \code{TRUE} then the usual coefficients from the fitted object will be used, if \code{FALSE} then the unshrunk coefficients will be used.
}
\item{gene.length}{vector of length \code{nrow(y)} giving gene length in bases, or the name of the column \code{y$genes} containing the gene lengths.}
\item{group}{factor giving group membership for columns of \code{y}. Defaults to \code{y$sample$group} for the \code{DGEList} method and to a single level factor for the default method.}
\item{dispersion}{numeric vector of negative binomial dispersions.}
\item{weights}{numeric vector or matrix of non-negative quantitative weights.
Can be a vector of length equal to the number of libraries, or a matrix of the same size as \code{y}.}
\item{\dots}{other arguments are not used.}
}
\value{
A numeric matrix of CPM or RPKM values, on the log2 scale if \code{log=TRUE}.
\code{cpm} and \code{rpkm} produce matrices of the same size as \code{y}.
If \code{y} was a data object, then observed values are returned.
If \code{y} was a fitted model object, then fitted values are returned.
\code{cpmByGroup} and \code{rpkmByGroup} produce matrices with a column for each level of \code{group}.
}
\details{
CPM or RPKM values are useful descriptive measures for the expression level of a gene.
By default, the normalized library sizes are used in the computation for \code{DGEList} objects but simple column sums for matrices.
If log-values are computed, then a small count, given by \code{prior.count} but scaled to be proportional to the library size, is added to \code{y} to avoid taking the log of zero.
The \code{rpkm} methods for \code{DGEList}, \code{DGEGLM} or \code{DGELRT} objects will try to find the gene lengths in a column of \code{y$genes} called \code{Length} or \code{length}.
Failing that, it will look for any column name containing \code{"length"} in any capitalization.
The \code{cpm} and \code{rpkm} methods for \code{DGEGLM} and \code{DGELRT} fitted model objects return fitted CPM or RPKM values.
If \code{shrunk=TRUE}, then the CPM or RPKM values will reflect the \code{prior.count} input to the original linear model fit.
If \code{shrunk=FALSE}, then the CPM or RPKM values will be computed with \code{prior.count=0}.
Note that the latter could result in taking the log of near-zero values if \code{log=TRUE}.
\code{cpmByGroup} and \code{rpkmByGroup} compute group average values on the unlogged scale.
}
\note{
\code{aveLogCPM(y)}, \code{rowMeans(cpm(y,log=TRUE))} and \code{log2(rowMeans(cpm(y))} all give slightly different results.
}
\author{Davis McCarthy, Gordon Smyth, Yunshun Chen, Aaron Lun}
\seealso{
\code{\link{aveLogCPM}}
}
\examples{
y <- matrix(rnbinom(20,size=1,mu=10),5,4)
cpm(y)
d <- DGEList(counts=y, lib.size=1001:1004)
cpm(d)
cpm(d,log=TRUE)
d$genes <- data.frame(Length=c(1000,2000,500,1500,3000))
rpkm(d)
cpmByGroup(d, group=c(1,1,2,2))
rpkmByGroup(d, group=c(1,1,2,2))
}
\concept{Data exploration}
|