File: PPC-errors.Rd

package info (click to toggle)
r-cran-bayesplot 1.11.1-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 7,080 kB
sloc: sh: 13; makefile: 2
file content (225 lines) | stat: -rw-r--r-- 7,526 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ppc-errors.R
\name{PPC-errors}
\alias{PPC-errors}
\alias{ppc_error_hist}
\alias{ppc_error_hist_grouped}
\alias{ppc_error_scatter}
\alias{ppc_error_scatter_avg}
\alias{ppc_error_scatter_avg_grouped}
\alias{ppc_error_scatter_avg_vs_x}
\alias{ppc_error_binned}
\alias{ppc_error_data}
\title{PPC errors}
\usage{
ppc_error_hist(
  y,
  yrep,
  ...,
  facet_args = list(),
  binwidth = NULL,
  bins = NULL,
  breaks = NULL,
  freq = TRUE
)

ppc_error_hist_grouped(
  y,
  yrep,
  group,
  ...,
  facet_args = list(),
  binwidth = NULL,
  bins = NULL,
  breaks = NULL,
  freq = TRUE
)

ppc_error_scatter(y, yrep, ..., facet_args = list(), size = 2.5, alpha = 0.8)

ppc_error_scatter_avg(y, yrep, ..., size = 2.5, alpha = 0.8)

ppc_error_scatter_avg_grouped(
  y,
  yrep,
  group,
  ...,
  facet_args = list(),
  size = 2.5,
  alpha = 0.8
)

ppc_error_scatter_avg_vs_x(y, yrep, x, ..., size = 2.5, alpha = 0.8)

ppc_error_binned(
  y,
  yrep,
  ...,
  facet_args = list(),
  bins = NULL,
  size = 1,
  alpha = 0.25
)

ppc_error_data(y, yrep, group = NULL)
}
\arguments{
\item{y}{A vector of observations. See \strong{Details}.}

\item{yrep}{An \code{S} by \code{N} matrix of draws from the posterior (or prior)
predictive distribution. The number of rows, \code{S}, is the size of the
posterior (or prior) sample used to generate \code{yrep}. The number of columns,
\code{N} is the number of predicted observations (\code{length(y)}). The columns of
\code{yrep} should be in the same order as the data points in \code{y} for the plots
to make sense. See the \strong{Details} and \strong{Plot Descriptions} sections for
additional advice specific to particular plots.}

\item{...}{Currently unused.}

\item{facet_args}{A named list of arguments (other than \code{facets}) passed
to \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}} or \code{\link[ggplot2:facet_grid]{ggplot2::facet_grid()}}
to control faceting. Note: if \code{scales} is not included in \code{facet_args}
then \strong{bayesplot} may use \code{scales="free"} as the default (depending
on the plot) instead of the \strong{ggplot2} default of \code{scales="fixed"}.}

\item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override
the default binwidth.}

\item{bins}{For \code{ppc_error_binned()}, the number of bins to use (approximately).}

\item{breaks}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} as an
alternative to \code{binwidth}.}

\item{freq}{For histograms, \code{freq=TRUE} (the default) puts count on the
y-axis. Setting \code{freq=FALSE} puts density on the y-axis. (For many
plots the y-axis text is off by default. To view the count or density
labels on the y-axis see the \code{\link[=yaxis_text]{yaxis_text()}} convenience
function.)}

\item{group}{A grouping variable of the same length as \code{y}.
Will be coerced to \link[base:factor]{factor} if not already a factor.
Each value in \code{group} is interpreted as the group level pertaining
to the corresponding observation.}

\item{size, alpha}{For scatterplots, arguments passed to
\code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} to control the appearance of the points. For the
binned error plot, arguments controlling the size of the outline and
opacity of the shaded region indicating the 2-SE bounds.}

\item{x}{A numeric vector the same length as \code{y} to use as the x-axis
variable.}
}
\value{
A ggplot object that can be further customized using the \strong{ggplot2} package.
}
\description{
Various plots of predictive errors \code{y - yrep}. See the
\strong{Details} and \strong{Plot Descriptions} sections, below.
}
\details{
All of these functions (aside from the \verb{*_scatter_avg} functions)
compute and plot predictive errors for each row of the matrix \code{yrep}, so
it is usually a good idea for \code{yrep} to contain only a small number of
draws (rows). See \strong{Examples}, below.

For binomial and Bernoulli data the \code{ppc_error_binned()} function can be used
to generate binned error plots. Bernoulli data can be input as a vector of 0s
and 1s, whereas for binomial data \code{y} and \code{yrep} should contain "success"
proportions (not counts). See the \strong{Examples} section, below.
}
\section{Plot descriptions}{

\describe{
\item{\code{ppc_error_hist()}}{
A separate histogram is plotted for the predictive errors computed from
\code{y} and each dataset (row) in \code{yrep}. For this plot \code{yrep} should have
only a small number of rows.
}
\item{\code{ppc_error_hist_grouped()}}{
Like \code{ppc_error_hist()}, except errors are computed within levels of a
grouping variable. The number of histograms is therefore equal to the
product of the number of rows in \code{yrep} and the number of groups
(unique values of \code{group}).
}
\item{\code{ppc_error_scatter()}}{
A separate scatterplot is displayed for \code{y} vs. the predictive errors
computed from \code{y} and each dataset (row) in \code{yrep}. For this plot \code{yrep}
should have only a small number of rows.
}
\item{\code{ppc_error_scatter_avg()}}{
A single scatterplot of \code{y} vs. the average of the errors computed from
\code{y} and each dataset (row) in \code{yrep}. For each individual data point
\code{y[n]} the average error is the average of the errors for \code{y[n]} computed
over the the draws from the posterior predictive distribution.
}
\item{\code{ppc_error_scatter_avg_vs_x()}}{
Same as \code{ppc_error_scatter_avg()}, except the average is plotted on the
y-axis and a predictor variable \code{x} is plotted on the x-axis.
}
\item{\code{ppc_error_binned()}}{
Intended for use with binomial data. A separate binned error plot (similar
to \code{arm::binnedplot()}) is generated for each dataset (row) in \code{yrep}. For
this plot \code{y} and \code{yrep} should contain proportions rather than counts,
and \code{yrep} should have only a small number of rows.
}
}
}

\examples{
y <- example_y_data()
yrep <- example_yrep_draws()
ppc_error_hist(y, yrep[1:3, ])

# errors within groups
group <- example_group_data()
(p1 <- ppc_error_hist_grouped(y, yrep[1:3, ], group))
p1 + yaxis_text() # defaults to showing counts on y-axis
\donttest{
table(group) # more obs in GroupB, can set freq=FALSE to show density on y-axis
(p2 <- ppc_error_hist_grouped(y, yrep[1:3, ], group, freq = FALSE))
p2 + yaxis_text()
}

# scatterplots
ppc_error_scatter(y, yrep[10:14, ])
ppc_error_scatter_avg(y, yrep)

x <- example_x_data()
ppc_error_scatter_avg_vs_x(y, yrep, x)

\dontrun{
# binned error plot with binomial model from rstanarm
library(rstanarm)
example("example_model", package = "rstanarm")
formula(example_model)

# get observed proportion of "successes"
y <- example_model$y  # matrix of "success" and "failure" counts
trials <- rowSums(y)
y_prop <- y[, 1] / trials  # proportions

# get predicted success proportions
yrep <- posterior_predict(example_model)
yrep_prop <- sweep(yrep, 2, trials, "/")

ppc_error_binned(y_prop, yrep_prop[1:6, ])
}

}
\references{
Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari,
A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC
Press, London, third edition. (Ch. 6)
}
\seealso{
Other PPCs: 
\code{\link{PPC-censoring}},
\code{\link{PPC-discrete}},
\code{\link{PPC-distributions}},
\code{\link{PPC-intervals}},
\code{\link{PPC-loo}},
\code{\link{PPC-overview}},
\code{\link{PPC-scatterplots}},
\code{\link{PPC-test-statistics}}
}
\concept{PPCs}