1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/E_loo.R
\name{E_loo}
\alias{E_loo}
\alias{E_loo.default}
\alias{E_loo.matrix}
\title{Compute weighted expectations}
\usage{
E_loo(x, psis_object, ...)
\method{E_loo}{default}(
x,
psis_object,
...,
type = c("mean", "variance", "sd", "quantile"),
probs = NULL,
log_ratios = NULL
)
\method{E_loo}{matrix}(
x,
psis_object,
...,
type = c("mean", "variance", "sd", "quantile"),
probs = NULL,
log_ratios = NULL
)
}
\arguments{
\item{x}{A numeric vector or matrix.}
\item{psis_object}{An object returned by \code{\link[=psis]{psis()}}.}
\item{...}{Arguments passed to individual methods.}
\item{type}{The type of expectation to compute. The options are
\code{"mean"}, \code{"variance"}, \code{"sd"}, and \code{"quantile"}.}
\item{probs}{For computing quantiles, a vector of probabilities.}
\item{log_ratios}{Optionally, a vector or matrix (the same dimensions as \code{x})
of raw (not smoothed) log ratios. If working with log-likelihood values,
the log ratios are the \strong{negative} of those values. If \code{log_ratios} is
specified we are able to compute more accurate \link[=pareto-k-diagnostic]{Pareto k}
diagnostics specific to \code{E_loo()}.}
}
\value{
A named list with the following components:
\describe{
\item{\code{value}}{
The result of the computation.
For the matrix method, \code{value} is a vector with \code{ncol(x)}
elements, with one exception: when \code{type="quantile"} and
multiple values are specified in \code{probs} the \code{value} component of
the returned object is a \code{length(probs)} by \code{ncol(x)} matrix.
For the default/vector method the \code{value} component is scalar, with
one exception: when \code{type="quantile"} and multiple values
are specified in \code{probs} the \code{value} component is a vector with
\code{length(probs)} elements.
}
\item{\code{pareto_k}}{
Function-specific diagnostic.
For the matrix method it will be a vector of length \code{ncol(x)}
containing estimates of the shape parameter \eqn{k} of the
generalized Pareto distribution. For the default/vector method,
the estimate is a scalar. If \code{log_ratios} is not specified when
calling \code{E_loo()}, the smoothed log-weights are used to estimate
Pareto-k's, which may produce optimistic estimates.
For \code{type="mean"}, \code{type="var"}, and \code{type="sd"}, the returned Pareto-k is
usually the maximum of the Pareto-k's for the left and right tail of \eqn{hr}
and the right tail of \eqn{r}, where \eqn{r} is the importance ratio and
\eqn{h=x} for \code{type="mean"} and \eqn{h=x^2} for \code{type="var"} and \code{type="sd"}.
If \eqn{h} is binary, constant, or not finite, or if \code{type="quantile"}, the
returned Pareto-k is the Pareto-k for the right tail of \eqn{r}.
}
}
}
\description{
The \code{E_loo()} function computes weighted expectations (means, variances,
quantiles) using the importance weights obtained from the \link[=psis]{PSIS}
smoothing procedure. The expectations estimated by the \code{E_loo()} function
assume that the PSIS approximation is working well.
\strong{A small \link[=pareto-k-diagnostic]{Pareto k} estimate is necessary,
but not sufficient, for \code{E_loo()} to give reliable estimates}. If the
\code{log_ratios} argument is provided, \code{E_loo()} also computes a function
specific Pareto k diagnostic, which must also be small for a reliable
estimate. See more details below.
}
\examples{
\donttest{
if (requireNamespace("rstanarm", quietly = TRUE)) {
# Use rstanarm package to quickly fit a model and get both a log-likelihood
# matrix and draws from the posterior predictive distribution
library("rstanarm")
# data from help("lm")
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
d <- data.frame(
weight = c(ctl, trt),
group = gl(2, 10, 20, labels = c("Ctl","Trt"))
)
fit <- stan_glm(weight ~ group, data = d, refresh = 0)
yrep <- posterior_predict(fit)
dim(yrep)
log_ratios <- -1 * log_lik(fit)
dim(log_ratios)
r_eff <- relative_eff(exp(-log_ratios), chain_id = rep(1:4, each = 1000))
psis_object <- psis(log_ratios, r_eff = r_eff, cores = 2)
E_loo(yrep, psis_object, type = "mean")
E_loo(yrep, psis_object, type = "var")
E_loo(yrep, psis_object, type = "sd")
E_loo(yrep, psis_object, type = "quantile", probs = 0.5) # median
E_loo(yrep, psis_object, type = "quantile", probs = c(0.1, 0.9))
# We can get more accurate Pareto k diagnostic if we also provide
# the log_ratios argument
E_loo(yrep, psis_object, type = "mean", log_ratios = log_ratios)
}
}
}
|