File: int_pctl.Rd

package info (click to toggle)
r-cran-rsample 1.2.1%2Bdfsg-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 1,932 kB
sloc: sh: 13; makefile: 2
file content (117 lines) | stat: -rw-r--r-- 3,731 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bootci.R
\name{int_pctl}
\alias{int_pctl}
\alias{int_pctl.bootstraps}
\alias{int_t}
\alias{int_t.bootstraps}
\alias{int_bca}
\alias{int_bca.bootstraps}
\title{Bootstrap confidence intervals}
\usage{
int_pctl(.data, ...)

\method{int_pctl}{bootstraps}(.data, statistics, alpha = 0.05, ...)

int_t(.data, ...)

\method{int_t}{bootstraps}(.data, statistics, alpha = 0.05, ...)

int_bca(.data, ...)

\method{int_bca}{bootstraps}(.data, statistics, alpha = 0.05, .fn, ...)
}
\arguments{
\item{.data}{A data frame containing the bootstrap resamples created using
\code{bootstraps()}. For t- and BCa-intervals, the \code{apparent} argument
should be set to \code{TRUE}. Even if the \code{apparent} argument is set to
\code{TRUE} for the percentile method, the apparent data is never used in calculating
the percentile confidence interval.}

\item{...}{Arguments to pass to \code{.fn} (\code{int_bca()} only).}

\item{statistics}{An unquoted column name or \code{dplyr} selector that identifies
a single column in the data set containing the individual bootstrap
estimates. This must be a list column of tidy tibbles (with columns
\code{term} and \code{estimate}). For t-intervals, a
standard tidy column (usually called \code{std.err}) is required.
See the examples below.}

\item{alpha}{Level of significance.}

\item{.fn}{A function to calculate statistic of interest. The
function should take an \code{rsplit} as the first argument and the \code{...} are
required.}
}
\value{
Each function returns a tibble with columns \code{.lower},
\code{.estimate}, \code{.upper}, \code{.alpha}, \code{.method}, and \code{term}.
\code{.method} is the type of interval (eg. "percentile",
"student-t", or "BCa"). \code{term} is the name of the estimate. Note
the \code{.estimate} returned from \code{int_pctl()}
is the mean of the estimates from the bootstrap resamples
and not the estimate from the apparent model.
}
\description{
Calculate bootstrap confidence intervals using various methods.
}
\details{
Percentile intervals are the standard method of
obtaining confidence intervals but require thousands of
resamples to be accurate. T-intervals may need fewer
resamples but require a corresponding variance estimate.
Bias-corrected and accelerated intervals require the original function
that was used to create the statistics of interest and are
computationally taxing.
}
\examples{
\dontshow{if (rlang::is_installed("broom")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\donttest{
library(broom)
library(dplyr)
library(purrr)
library(tibble)

lm_est <- function(split, ...) {
  lm(mpg ~ disp + hp, data = analysis(split)) \%>\%
    tidy()
}

set.seed(52156)
car_rs <-
  bootstraps(mtcars, 500, apparent = TRUE) \%>\%
  mutate(results = map(splits, lm_est))

int_pctl(car_rs, results)
int_t(car_rs, results)
int_bca(car_rs, results, .fn = lm_est)

# putting results into a tidy format
rank_corr <- function(split) {
  dat <- analysis(split)
  tibble(
    term = "corr",
    estimate = cor(dat$sqft, dat$price, method = "spearman"),
    # don't know the analytical std.err so no t-intervals
    std.err = NA_real_
  )
}

set.seed(69325)
data(Sacramento, package = "modeldata")
bootstraps(Sacramento, 1000, apparent = TRUE) \%>\%
  mutate(correlations = map(splits, rank_corr)) \%>\%
  int_pctl(correlations)
}
\dontshow{\}) # examplesIf}
}
\references{
\url{https://rsample.tidymodels.org/articles/Applications/Intervals.html}

Davison, A., & Hinkley, D. (1997). \emph{Bootstrap Methods and their
Application}. Cambridge: Cambridge University Press.
doi:10.1017/CBO9780511802843
}
\seealso{
\code{\link[=reg_intervals]{reg_intervals()}}
}