File: PPC-censoring.Rd

package info (click to toggle)
r-cran-bayesplot 1.14.0-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 7,288 kB
sloc: sh: 13; makefile: 2
file content (162 lines) | stat: -rw-r--r-- 5,660 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ppc-censoring.R
\name{PPC-censoring}
\alias{PPC-censoring}
\alias{ppc_km_overlay}
\alias{ppc_km_overlay_grouped}
\title{PPC censoring}
\usage{
ppc_km_overlay(
  y,
  yrep,
  ...,
  status_y,
  left_truncation_y = NULL,
  extrapolation_factor = 1.2,
  size = 0.25,
  alpha = 0.7
)

ppc_km_overlay_grouped(
  y,
  yrep,
  group,
  ...,
  status_y,
  left_truncation_y = NULL,
  extrapolation_factor = 1.2,
  size = 0.25,
  alpha = 0.7
)
}
\arguments{
\item{y}{A vector of observations. See \strong{Details}.}

\item{yrep}{An \code{S} by \code{N} matrix of draws from the posterior (or prior)
predictive distribution. The number of rows, \code{S}, is the size of the
posterior (or prior) sample used to generate \code{yrep}. The number of columns,
\code{N} is the number of predicted observations (\code{length(y)}). The columns of
\code{yrep} should be in the same order as the data points in \code{y} for the plots
to make sense. See the \strong{Details} and \strong{Plot Descriptions} sections for
additional advice specific to particular plots.}

\item{...}{Currently only used internally.}

\item{status_y}{The status indicator for the observations from \code{y}. This must
be a numeric vector of the same length as \code{y} with values in \{0, 1\} (0 =
right censored, 1 = event).}

\item{left_truncation_y}{Optional parameter that specifies left-truncation
(delayed entry) times for the observations from \code{y}. This must be a numeric
vector of the same length as \code{y}. If \code{NULL} (default), no left-truncation
is assumed.}

\item{extrapolation_factor}{A numeric value (>=1) that controls how far the
plot is extended beyond the largest observed value in \code{y}. The default
value is 1.2, which corresponds to 20 \% extrapolation. Note that all
posterior predictive draws may not be shown by default because of the
controlled extrapolation. To display all posterior predictive draws, set
\code{extrapolation_factor = Inf}.}

\item{size, alpha}{Passed to the appropriate geom to control the appearance of
the \code{yrep} distributions.}

\item{group}{A grouping variable of the same length as \code{y}.
Will be coerced to \link[base:factor]{factor} if not already a factor.
Each value in \code{group} is interpreted as the group level pertaining
to the corresponding observation.}
}
\value{
A ggplot object that can be further customized using the \strong{ggplot2} package.
}
\description{
Compare the empirical distribution of censored data \code{y} to the
distributions of simulated/replicated data \code{yrep} from the posterior
predictive distribution. See the \strong{Plot Descriptions} section, below, for
details.

Although some of the other \pkg{bayesplot} plots can be used with censored
data, \code{ppc_km_overlay()} is currently the only plotting function designed
\emph{specifically} for censored data. We encourage you to suggest or contribute
additional plots at
\href{https://github.com/stan-dev/bayesplot}{github.com/stan-dev/bayesplot}.
}
\section{Plot Descriptions}{

\describe{
\item{\code{ppc_km_overlay()}}{
Empirical CCDF estimates of each dataset (row) in \code{yrep} are overlaid, with
the Kaplan-Meier estimate (Kaplan and Meier, 1958) for \code{y} itself on top
(and in a darker shade). This is a PPC suitable for right-censored \code{y}.
Note that the replicated data from \code{yrep} is assumed to be uncensored. Left
truncation (delayed entry) times for \code{y} can be specified using
\code{left_truncation_y}.
}
\item{\code{ppc_km_overlay_grouped()}}{
The same as \code{ppc_km_overlay()}, but with separate facets by \code{group}.
}
}
}

\examples{
\donttest{
color_scheme_set("brightblue")

# For illustrative purposes, (right-)censor values y > 110:
y <- example_y_data()
status_y <- as.numeric(y <= 110)
y <- pmin(y, 110)

# In reality, the replicated data (yrep) would be obtained from a
# model which takes the censoring of y properly into account. Here,
# for illustrative purposes, we simply use example_yrep_draws():
yrep <- example_yrep_draws()
dim(yrep)

# Overlay 25 curves
ppc_km_overlay(y, yrep[1:25, ], status_y = status_y)

# With extrapolation_factor = 1 (no extrapolation)
ppc_km_overlay(y, yrep[1:25, ], status_y = status_y, extrapolation_factor = 1)

# With extrapolation_factor = Inf (show all posterior predictive draws)
ppc_km_overlay(y, yrep[1:25, ], status_y = status_y, extrapolation_factor = Inf)

# With separate facets by group:
group <- example_group_data()
ppc_km_overlay_grouped(y, yrep[1:25, ], group = group, status_y = status_y)

# With left-truncation (delayed entry) times:
min_vals <- pmin(y, apply(yrep, 2, min))
left_truncation_y <- rep(0, length(y))
condition <- y > mean(y) / 2
left_truncation_y[condition] <- pmin(
  runif(sum(condition), min = 0.6, max = 0.99) * y[condition],
  min_vals[condition] - 0.001
)
ppc_km_overlay(y, yrep[1:25, ], status_y = status_y,
              left_truncation_y = left_truncation_y)
}
}
\references{
Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari,
A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC
Press, London, third edition. (Ch. 6)

Kaplan, E. L. and Meier, P. (1958). Nonparametric estimation
from incomplete observations.
\emph{Journal of the American Statistical Association}. 53(282), 457--481.
doi:10.1080/01621459.1958.10501452.
}
\seealso{
Other PPCs: 
\code{\link{PPC-discrete}},
\code{\link{PPC-distributions}},
\code{\link{PPC-errors}},
\code{\link{PPC-intervals}},
\code{\link{PPC-loo}},
\code{\link{PPC-overview}},
\code{\link{PPC-scatterplots}},
\code{\link{PPC-test-statistics}}
}
\concept{PPCs}