File: expectedDeviance.Rd

package info (click to toggle)
r-cran-statmod 1.5.1-1
links: PTS, VCS
area: main
in suites: sid
size: 472 kB
sloc: ansic: 311; fortran: 76; sh: 4; makefile: 2
file content (116 lines) | stat: -rw-r--r-- 5,516 bytes
\name{expectedDeviance}
\alias{expectedDeviance}
\title{Expected Value of Scaled Unit Deviance for Linear Exponential Families}

\description{
Expected value and variance of the scaled unit deviance for common generalized linear model families.
}

\usage{
expectedDeviance(mu, family="binomial", binom.size, nbinom.size, gamma.shape)
}

\arguments{
  \item{mu}{numeric vector or matrix giving mean of response variable.}
  \item{family}{character string indicating the linear exponential family. Possible values are \code{"binomial"},\code{"gaussian"}, \code{"Gamma"}, \code{"inverse.gaussian"}, \code{"poisson"} or \code{"negative.binomial"}.}
  \item{binom.size}{integer vector giving the number of binomial trials when \code{family = "binomial"}. Equivalent to the \code{"size"} argument of \code{pbinom}.}
  \item{nbinom.size}{numeric vector giving the negative binomial size parameter when \code{family = "negative.binomial"}, such that the variance of the response variable is \code{mu + mu^2 / nbinom.size}. Equivalent to the \code{"size"} parameter of \code{pnbinom}.}
  \item{gamma.shape}{numeric vector giving the gamma shape parameter when \code{family = "Gamma"}, such that the variance of the response variable is \code{mu^2 / gamma.shape}. Equivalent to the \code{"shape"} parameter of \code{pgamma}.}
}

\details{
For a generalized linear model (GLM), the scaled unit deviances can be computed using \code{d <- f$dev.resids(y, mu, wt=1/phi)} where \code{f} is the GLM family object, \code{y} is the response variable, \code{mu} is the vector of means and \code{phi} is the vector of GLM dispersions (incorporating any prior weights).

The scaled unit deviances are often treated as being chiquare distributed on 1 df, so the mean should be 1 and the variance should be 2.
This distribution result only holds however when the saddlepoint approximation is accurate for the response variable distribution (Dunn and Smyth, 2018).
In other cases, the expected value and variance of the unit deviances can be far from the nominal values.
The \code{expectedDeviance} function returns the exact mean and variance of the unit deviance for the usual GLM familes assuming that \code{mu} is the true mean and \code{phi} is the true dispersion.

When \code{family} is \code{"poisson"}, \code{"binomial"} or \code{"negative.binomial"}, the expected values and variances are computed using Chebyshev polynomial approximations.
When \code{family = "Gamma"}, the function uses exact formulas derived by Smyth (1989).

Exact calculation of expected values and variances of negative binomial unit deviances have been used to construct adjusted residual deviances and adjusted residual degrees of freedom when fitting negative binomial generalized linear models in the edgeR software package (Baldoni et al 2024; Baldoni et al 2025; Chen et al, 2025).
}

\value{
A list with the components
  \item{mean}{expected values}
  \item{variance}{variances}
both of which have the same length and dimensions as the input \code{mu}.
}

\author{Lizhong Chen and Gordon Smyth}

\references{
Baldoni PL, Chen L, Smyth GK (2024).
Faster and more accurate assessment of differential transcript expression with Gibbs sampling and edgeR v4.
\emph{NAR Genomics and Bioinformatics} 6(4), lqae151.
\doi{10.1093/nargab/lqae151}.

Baldoni PL, Chen L, Li M, Chen Y, Smyth GK (2025).
Dividing out quantification uncertainty enables assessment of differential transcript usage with limma and edgeR.
\emph{bioRxiv}
\doi{10.1101/2025.04.07.647659}.

Chen Y, Chen L, Lun ATL, Baldoni PL, Smyth GK (2025).
edgeR v4: powerful differential analysis of sequencing data with expanded functionality and improved support for small counts and larger datasets.
\emph{Nucleic Acids Research} 53(2), gkaf018.
\doi{10.1093/nar/gkaf018}

Dunn PK, Smyth GK (2018).
\emph{Generalized linear models with examples in R}.
Springer, New York, NY.
\doi{10.1007/978-1-4419-0118-7}

Smyth, G. K. (1989).
Generalized linear models with varying dispersion.
\emph{J. R. Statist. Soc. B}, \bold{51}, 47-61.
\doi{10.1111/j.2517-6161.1989.tb01747.x}
}

\examples{
# Poisson example
lambda <- 3
nsim <- 1e4
y <- rpois(nsim, lambda=lambda)
d <- poisson()$dev.resids(y=y, mu=rep(lambda,nsim), wt=1)
c(mean=mean(d), variance=var(d))
unlist(expectedDeviance(mu=lambda, family="poisson"))

# binomial example
n <- 10
p <- 0.01
y <- rbinom(nsim, prob=p, size=n)
d <- binomial()$dev.resids(y=y/n, mu=rep(p,nsim), wt=n)
c(mean=mean(d), variance=var(d))
unlist(expectedDeviance(mu=p, family="binomial", binom.size=n))

# gamma example
alpha <- 5
beta <- 2
y <- beta * rgamma(1e4, shape=alpha)
d <- Gamma()$dev.resids(y=y, mu=rep(alpha*beta,n), wt=alpha)
c(mean=mean(d), variance=var(d))
unlist(expectedDeviance(mu=alpha*beta, family="Gamma", gamma.shape=alpha))

# negative binomial example
library(MASS)
mu <- 10
phi <- 0.2
y <- rnbinom(nsim, mu=mu, size=1/phi)
f <- MASS::negative.binomial(theta=1/phi)
d <- f$dev.resids(y=y, mu=rep(mu,nsim), wt=1)
c(mean=mean(d), variance=var(d))
unlist(expectedDeviance(mu=mu, family="negative.binomial", nbinom.size=1/phi))

# binomial expected deviance tends to zero for p small:
p <- seq(from=0.001,to=0.11,len=200)
ed <- expectedDeviance(mu=p,family="binomial",binom.size=10)
plot(p,ed$mean,type="l")
}

\seealso{
\code{\link{family}}, \code{\link{meanval.digamma}}, \code{\link{d2cumulant.digamma}}.
}

\keyword{distributions}