1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
|
#' Tidy parameter selection
#'
#' Parameter selection in the style of **dplyr** and other tidyverse packages.
#'
#' @name tidy-params
#'
#' @details
#' As of version `1.7.0`, **bayesplot** allows the `pars` argument for [MCMC
#' plots][bayesplot::MCMC-overview] to use "tidy" variable selection (in the
#' style of the **dplyr** package). The [`vars()`][dplyr::vars] function is
#' re-exported from **dplyr** for this purpose.
#'
#' Features of tidy selection includes direct selection (`vars(alpha, sigma)`),
#' everything-but selection (`vars(-alpha)`), ranged selection
#' (``vars(`beta[1]`:`beta[3]`)``), support for selection functions
#' (`vars(starts_with("beta"))`), and combinations of these features. See the
#' **Examples** section, below.
#'
#' When using `pars` for tidy parameter selection, the `regex_pars` argument is
#' ignored because **bayesplot** supports using [tidyselect helper
#' functions][tidyselect::select_helpers] (`starts_with()`, `contains()`,
#' `num_range()`, etc.) for the same purpose. **bayesplot** also exports some
#' additional helper functions to help with parameter selection:
#'
#' * `param_range()`: like [`num_range()`][tidyselect::num_range] but used
#' when parameter indexes are in brackets (e.g. `beta[2]`).
#'
#' * `param_glue()`: for more complicated parameter names with multiple
#' indexes (including variable names) inside the brackets
#' (e.g., `beta[(Intercept) age_group:3]`).
#'
#' These functions can be used inside of `vars()`, `dplyr::select()`,
#' and similar functions, just like the
#' [tidyselect helper functions][tidyselect::select_helpers].
#'
#' @section Extra Advice:
#'
#' Parameter names in `vars()` are not quoted. When the names contain special
#' characters like brackets, they should be wrapped in backticks, as in
#' ``vars(`beta[1]`)``.
#'
#' To exclude a range of variables, wrap the sequence in parentheses and then
#' negate it. For example, (``vars(-(`beta[1]`:`beta[3]`))``) would exclude
#' `beta[1]`, `beta[2]`, and `beta[3]`.
#'
#' `vars()` is a helper function. It holds onto the names and expressions used
#' to select columns. When selecting variables inside a **bayesplot**
#' function, use `vars(...)`: `mcmc_hist(data, pars = vars(alpha))`. When
#' using `select()` to prepare a dataframe for a **bayesplot** function, do
#' not use `vars()`: `data %>% select(alpha) %>% mcmc_hist()`.
#'
#' Internally, tidy selection works by converting names and expressions
#' into position numbers. As a result, integers will select parameters;
#' `vars(1, 3)` selects the first and third ones. We do not endorse this
#' approach because positions might change as variables are added and
#' removed from models. To select a parameter that happens to be called `1`,
#' use backticks to escape it ``vars(`1`)``.
#'
#' @seealso [glue::glue()]
#'
#' @examples
#' x <- example_mcmc_draws(params = 6)
#' dimnames(x)
#' mcmc_hex(x, pars = vars(alpha, `beta[2]`))
#' mcmc_dens(x, pars = vars(sigma, contains("beta")))
#' mcmc_hist(x, pars = vars(-contains("beta")))
#'
#' # using the param_range() helper
#' mcmc_hist(x, pars = vars(param_range("beta", c(1, 3, 4))))
#'
#' \donttest{
#' #############################
#' ## Examples using rstanarm ##
#' #############################
#' if (requireNamespace("rstanarm", quietly = TRUE)) {
#' # see ?rstanarm::example_model
#' fit <- example("example_model", package = "rstanarm", local=TRUE)$value
#' print(fit)
#' posterior <- as.data.frame(fit)
#' str(posterior)
#'
#' color_scheme_set("brightblue")
#' mcmc_hist(posterior, pars = vars(size, contains("period")))
#'
#' # same as previous but using dplyr::select() and piping
#' library("dplyr")
#' posterior %>%
#' select(size, contains("period")) %>%
#' mcmc_hist()
#'
#' mcmc_intervals(posterior, pars = vars(contains("herd")))
#' mcmc_intervals(posterior, pars = vars(contains("herd"), -contains("Sigma")))
#'
#' bayesplot_theme_set(ggplot2::theme_dark())
#' color_scheme_set("viridisC")
#' mcmc_areas_ridges(posterior, pars = vars(starts_with("b[")))
#'
#' bayesplot_theme_set()
#' color_scheme_set("purple")
#' not_789 <- vars(starts_with("b["), -matches("[7-9]"))
#' mcmc_intervals(posterior, pars = not_789)
#'
#' # using the param_glue() helper
#' just_149 <- vars(param_glue("b[(Intercept) herd:{level}]", level = c(1,4,9)))
#' mcmc_intervals(posterior, pars = just_149)
#'
#' # same but using param_glue() with dplyr::select()
#' # before passing to bayesplot
#' posterior %>%
#' select(param_glue("b[(Intercept) herd:{level}]",
#' level = c(1, 4, 9))) %>%
#' mcmc_intervals()
#' }
#'}
NULL
# re-export vars for tidy parameter selection
#' @importFrom dplyr vars
#' @export
dplyr::vars
#' @rdname tidy-params
#' @export
#' @param vars `NULL` or a character vector of parameter names to choose from.
#' This is only needed for the atypical use case of calling the function as a
#' standalone function outside of `vars()`, `select()`, etc. Typically this is
#' left as `NULL` and will be set automatically for the user.
#' @param prefix,range For `param_range()` only, `prefix` is a string naming a
#' parameter and `range` is an integer vector providing the indices of a
#' subset of elements to select. For example, using
#'
#' param_range("beta", c(1,2,8))
#'
#' would select parameters named `beta[1]`, `beta[2]`, and `beta[8]`.
#' `param_range()` is only designed for the case that the indices are integers
#' surrounded by brackets. If there are no brackets use
#' [num_range()][tidyselect::select_helpers].
#'
param_range <- function(prefix, range, vars = NULL) {
if (!is.null(vars) && !is.character(vars)) {
abort("'vars' must be NULL or a character vector.")
}
nms <- paste0(prefix, "[", range, "]")
param_matches <- match(nms, vars %||% tidyselect::peek_vars())
param_matches[!is.na(param_matches)]
}
#' @rdname tidy-params
#' @export
#' @param pattern,... For `param_glue()` only, `pattern` is a string containing
#' expressions enclosed in braces and `...` should be named arguments
#' providing one character vector per expression in braces in `pattern`. It is
#' easiest to describe how to use these arguments with an example:
#'
#' param_glue("beta_{var}[{level}]",
#' var = c("age", "income"),
#' level = c(3,8))
#'
#' would select parameters with names
#' `"beta_age[3]"`, `"beta_income[3]"`, `"beta_age[8]"`, `"beta_income[8]"`.
#'
#' @examples
#' \dontrun{
#' ###################################
#' ## More examples of param_glue() ##
#' ###################################
#' library(dplyr)
#' posterior <- tibble(
#' b_Intercept = rnorm(1000),
#' sd_condition__Intercept = rexp(1000),
#' sigma = rexp(1000),
#' `r_condition[A,Intercept]` = rnorm(1000),
#' `r_condition[B,Intercept]` = rnorm(1000),
#' `r_condition[C,Intercept]` = rnorm(1000),
#' `r_condition[A,Slope]` = rnorm(1000),
#' `r_condition[B,Slope]` = rnorm(1000)
#' )
#' posterior
#'
#' # using one expression in braces
#' posterior %>%
#' select(
#' param_glue("r_condition[{level},Intercept]", level = c("A", "B"))
#' ) %>%
#' mcmc_hist()
#'
#' # using multiple expressions in braces
#' posterior %>%
#' select(
#' param_glue(
#' "r_condition[{level},{type}]",
#' level = c("A", "B"),
#' type = c("Intercept", "Slope"))
#' ) %>%
#' mcmc_hist()
#'}
param_glue <- function(pattern, ..., vars = NULL) {
if (!is.null(vars) && !is.character(vars)) {
abort("'vars' must be NULL or a character vector.")
}
dots <- as.list(expand.grid(...))
nms <- as.character(glue::glue_data(dots, pattern))
param_matches <- match(nms, vars %||% tidyselect::peek_vars())
param_matches[!is.na(param_matches)]
}
# internal ----------------------------------------------------------------
#' Internal function for tidy parameter selection
#'
#' This function is called internally by `prepare_mcmc_array()` if the user's
#' `pars` argument is a quosure.
#'
#' @noRd
#' @param complete_pars A character vector of *all* parameter names.
#' @param pars_list A list of columns generated by `vars()`.
#' @return Character vector of selected parameter names.
tidyselect_parameters <- function(complete_pars, pars_list) {
# We use the list of helpers so that we don't have to keep track of any
# changes to tidyselect. We use `env_bury()`` so that the definitions of
# selection helpers are available. This pattern is taken from the example code
# in `vars_select_helpers`.
helpers <- tidyselect::vars_select_helpers
pars_list <- lapply(pars_list, rlang::env_bury, !!! helpers)
selected <- tidyselect::vars_select(.vars = complete_pars, !!! pars_list)
if (!length(selected)) {
abort("No parameters were found matching those names.")
}
unname(selected)
}
|