File: PPC-errors.Rd

package info (click to toggle)
r-cran-bayesplot 1.11.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 7,080 kB
  • sloc: sh: 13; makefile: 2
file content (225 lines) | stat: -rw-r--r-- 7,526 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ppc-errors.R
\name{PPC-errors}
\alias{PPC-errors}
\alias{ppc_error_hist}
\alias{ppc_error_hist_grouped}
\alias{ppc_error_scatter}
\alias{ppc_error_scatter_avg}
\alias{ppc_error_scatter_avg_grouped}
\alias{ppc_error_scatter_avg_vs_x}
\alias{ppc_error_binned}
\alias{ppc_error_data}
\title{PPC errors}
\usage{
ppc_error_hist(
  y,
  yrep,
  ...,
  facet_args = list(),
  binwidth = NULL,
  bins = NULL,
  breaks = NULL,
  freq = TRUE
)

ppc_error_hist_grouped(
  y,
  yrep,
  group,
  ...,
  facet_args = list(),
  binwidth = NULL,
  bins = NULL,
  breaks = NULL,
  freq = TRUE
)

ppc_error_scatter(y, yrep, ..., facet_args = list(), size = 2.5, alpha = 0.8)

ppc_error_scatter_avg(y, yrep, ..., size = 2.5, alpha = 0.8)

ppc_error_scatter_avg_grouped(
  y,
  yrep,
  group,
  ...,
  facet_args = list(),
  size = 2.5,
  alpha = 0.8
)

ppc_error_scatter_avg_vs_x(y, yrep, x, ..., size = 2.5, alpha = 0.8)

ppc_error_binned(
  y,
  yrep,
  ...,
  facet_args = list(),
  bins = NULL,
  size = 1,
  alpha = 0.25
)

ppc_error_data(y, yrep, group = NULL)
}
\arguments{
\item{y}{A vector of observations. See \strong{Details}.}

\item{yrep}{An \code{S} by \code{N} matrix of draws from the posterior (or prior)
predictive distribution. The number of rows, \code{S}, is the size of the
posterior (or prior) sample used to generate \code{yrep}. The number of columns,
\code{N} is the number of predicted observations (\code{length(y)}). The columns of
\code{yrep} should be in the same order as the data points in \code{y} for the plots
to make sense. See the \strong{Details} and \strong{Plot Descriptions} sections for
additional advice specific to particular plots.}

\item{...}{Currently unused.}

\item{facet_args}{A named list of arguments (other than \code{facets}) passed
to \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}} or \code{\link[ggplot2:facet_grid]{ggplot2::facet_grid()}}
to control faceting. Note: if \code{scales} is not included in \code{facet_args}
then \strong{bayesplot} may use \code{scales="free"} as the default (depending
on the plot) instead of the \strong{ggplot2} default of \code{scales="fixed"}.}

\item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override
the default binwidth.}

\item{bins}{For \code{ppc_error_binned()}, the number of bins to use (approximately).}

\item{breaks}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} as an
alternative to \code{binwidth}.}

\item{freq}{For histograms, \code{freq=TRUE} (the default) puts count on the
y-axis. Setting \code{freq=FALSE} puts density on the y-axis. (For many
plots the y-axis text is off by default. To view the count or density
labels on the y-axis see the \code{\link[=yaxis_text]{yaxis_text()}} convenience
function.)}

\item{group}{A grouping variable of the same length as \code{y}.
Will be coerced to \link[base:factor]{factor} if not already a factor.
Each value in \code{group} is interpreted as the group level pertaining
to the corresponding observation.}

\item{size, alpha}{For scatterplots, arguments passed to
\code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} to control the appearance of the points. For the
binned error plot, arguments controlling the size of the outline and
opacity of the shaded region indicating the 2-SE bounds.}

\item{x}{A numeric vector the same length as \code{y} to use as the x-axis
variable.}
}
\value{
A ggplot object that can be further customized using the \strong{ggplot2} package.
}
\description{
Various plots of predictive errors \code{y - yrep}. See the
\strong{Details} and \strong{Plot Descriptions} sections, below.
}
\details{
All of these functions (aside from the \verb{*_scatter_avg} functions)
compute and plot predictive errors for each row of the matrix \code{yrep}, so
it is usually a good idea for \code{yrep} to contain only a small number of
draws (rows). See \strong{Examples}, below.

For binomial and Bernoulli data the \code{ppc_error_binned()} function can be used
to generate binned error plots. Bernoulli data can be input as a vector of 0s
and 1s, whereas for binomial data \code{y} and \code{yrep} should contain "success"
proportions (not counts). See the \strong{Examples} section, below.
}
\section{Plot descriptions}{

\describe{
\item{\code{ppc_error_hist()}}{
A separate histogram is plotted for the predictive errors computed from
\code{y} and each dataset (row) in \code{yrep}. For this plot \code{yrep} should have
only a small number of rows.
}
\item{\code{ppc_error_hist_grouped()}}{
Like \code{ppc_error_hist()}, except errors are computed within levels of a
grouping variable. The number of histograms is therefore equal to the
product of the number of rows in \code{yrep} and the number of groups
(unique values of \code{group}).
}
\item{\code{ppc_error_scatter()}}{
A separate scatterplot is displayed for \code{y} vs. the predictive errors
computed from \code{y} and each dataset (row) in \code{yrep}. For this plot \code{yrep}
should have only a small number of rows.
}
\item{\code{ppc_error_scatter_avg()}}{
A single scatterplot of \code{y} vs. the average of the errors computed from
\code{y} and each dataset (row) in \code{yrep}. For each individual data point
\code{y[n]} the average error is the average of the errors for \code{y[n]} computed
over the the draws from the posterior predictive distribution.
}
\item{\code{ppc_error_scatter_avg_vs_x()}}{
Same as \code{ppc_error_scatter_avg()}, except the average is plotted on the
y-axis and a predictor variable \code{x} is plotted on the x-axis.
}
\item{\code{ppc_error_binned()}}{
Intended for use with binomial data. A separate binned error plot (similar
to \code{arm::binnedplot()}) is generated for each dataset (row) in \code{yrep}. For
this plot \code{y} and \code{yrep} should contain proportions rather than counts,
and \code{yrep} should have only a small number of rows.
}
}
}

\examples{
y <- example_y_data()
yrep <- example_yrep_draws()
ppc_error_hist(y, yrep[1:3, ])

# errors within groups
group <- example_group_data()
(p1 <- ppc_error_hist_grouped(y, yrep[1:3, ], group))
p1 + yaxis_text() # defaults to showing counts on y-axis
\donttest{
table(group) # more obs in GroupB, can set freq=FALSE to show density on y-axis
(p2 <- ppc_error_hist_grouped(y, yrep[1:3, ], group, freq = FALSE))
p2 + yaxis_text()
}

# scatterplots
ppc_error_scatter(y, yrep[10:14, ])
ppc_error_scatter_avg(y, yrep)

x <- example_x_data()
ppc_error_scatter_avg_vs_x(y, yrep, x)

\dontrun{
# binned error plot with binomial model from rstanarm
library(rstanarm)
example("example_model", package = "rstanarm")
formula(example_model)

# get observed proportion of "successes"
y <- example_model$y  # matrix of "success" and "failure" counts
trials <- rowSums(y)
y_prop <- y[, 1] / trials  # proportions

# get predicted success proportions
yrep <- posterior_predict(example_model)
yrep_prop <- sweep(yrep, 2, trials, "/")

ppc_error_binned(y_prop, yrep_prop[1:6, ])
}

}
\references{
Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari,
A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC
Press, London, third edition. (Ch. 6)
}
\seealso{
Other PPCs: 
\code{\link{PPC-censoring}},
\code{\link{PPC-discrete}},
\code{\link{PPC-distributions}},
\code{\link{PPC-intervals}},
\code{\link{PPC-loo}},
\code{\link{PPC-overview}},
\code{\link{PPC-scatterplots}},
\code{\link{PPC-test-statistics}}
}
\concept{PPCs}