File: int_pctl.Rd

package info (click to toggle)
r-cran-rsample 1.1.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,872 kB
  • sloc: sh: 13; makefile: 2
file content (108 lines) | stat: -rw-r--r-- 3,491 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bootci.R
\name{int_pctl}
\alias{int_pctl}
\alias{int_t}
\alias{int_bca}
\title{Bootstrap confidence intervals}
\usage{
int_pctl(.data, statistics, alpha = 0.05)

int_t(.data, statistics, alpha = 0.05)

int_bca(.data, statistics, alpha = 0.05, .fn, ...)
}
\arguments{
\item{.data}{A data frame containing the bootstrap resamples created using
\code{bootstraps()}. For t- and BCa-intervals, the \code{apparent} argument
should be set to \code{TRUE}. Even if the \code{apparent} argument is set to
\code{TRUE} for the percentile method, the apparent data is never used in calculating
the percentile confidence interval.}

\item{statistics}{An unquoted column name or \code{dplyr} selector that identifies
a single column in the data set containing the individual bootstrap
estimates. This must be a list column of tidy tibbles (with columns
\code{term} and \code{estimate}). For t-intervals, a
standard tidy column (usually called \code{std.err}) is required.
See the examples below.}

\item{alpha}{Level of significance}

\item{.fn}{A function to calculate statistic of interest. The
function should take an \code{rsplit} as the first argument and the \code{...} are
required.}

\item{...}{Arguments to pass to \code{.fn}.}
}
\value{
Each function returns a tibble with columns \code{.lower},
\code{.estimate}, \code{.upper}, \code{.alpha}, \code{.method}, and \code{term}.
\code{.method} is the type of interval (eg. "percentile",
"student-t", or "BCa"). \code{term} is the name of the estimate. Note
the \code{.estimate} returned from \code{int_pctl()}
is the mean of the estimates from the bootstrap resamples
and not the estimate from the apparent model.
}
\description{
Calculate bootstrap confidence intervals using various methods.
}
\details{
Percentile intervals are the standard method of
obtaining confidence intervals but require thousands of
resamples to be accurate. T-intervals may need fewer
resamples but require a corresponding variance estimate.
Bias-corrected and accelerated intervals require the original function
that was used to create the statistics of interest and are
computationally taxing.
}
\examples{
\dontshow{if (rlang::is_installed("broom")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\donttest{
library(broom)
library(dplyr)
library(purrr)
library(tibble)

lm_est <- function(split, ...) {
  lm(mpg ~ disp + hp, data = analysis(split)) \%>\%
    tidy()
}

set.seed(52156)
car_rs <-
  bootstraps(mtcars, 500, apparent = TRUE) \%>\%
  mutate(results = map(splits, lm_est))

int_pctl(car_rs, results)
int_t(car_rs, results)
int_bca(car_rs, results, .fn = lm_est)

# putting results into a tidy format
rank_corr <- function(split) {
  dat <- analysis(split)
  tibble(
    term = "corr",
    estimate = cor(dat$sqft, dat$price, method = "spearman"),
    # don't know the analytical std.err so no t-intervals
    std.err = NA_real_
  )
}

set.seed(69325)
data(Sacramento, package = "modeldata")
bootstraps(Sacramento, 1000, apparent = TRUE) \%>\%
  mutate(correlations = map(splits, rank_corr)) \%>\%
  int_pctl(correlations)
}
\dontshow{\}) # examplesIf}
}
\references{
Davison, A., & Hinkley, D. (1997). \emph{Bootstrap Methods and their
Application}. Cambridge: Cambridge University Press.
doi:10.1017/CBO9780511802843

\url{https://rsample.tidymodels.org/articles/Applications/Intervals.html}
}
\seealso{
\code{\link[=reg_intervals]{reg_intervals()}}
}