1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
|
#' Convenience function for computing relative efficiencies
#'
#' `relative_eff()` computes the the MCMC effective sample size divided by
#' the total sample size.
#'
#' @export
#' @param x A vector, matrix, 3-D array, or function. See the **Methods (by
#' class)** section below for details on specifying `x`, but where
#' "log-likelihood" is mentioned replace it with one of the following
#' depending on the use case:
#' * For use with the [loo()] function, the values in `x` (or generated by
#' `x`, if a function) should be **likelihood** values
#' (i.e., `exp(log_lik)`), not on the log scale.
#' * For generic use with [psis()], the values in `x` should be the reciprocal
#' of the importance ratios (i.e., `exp(-log_ratios)`).
#' @param chain_id A vector of length `NROW(x)` containing MCMC chain
#' indexes for each each row of `x` (if a matrix) or each value in
#' `x` (if a vector). No `chain_id` is needed if `x` is a 3-D
#' array. If there are `C` chains then valid chain indexes are values
#' in `1:C`.
#' @param cores The number of cores to use for parallelization.
#' @return A vector of relative effective sample sizes.
#'
#' @examples
#' LLarr <- example_loglik_array()
#' LLmat <- example_loglik_matrix()
#' dim(LLarr)
#' dim(LLmat)
#'
#' rel_n_eff_1 <- relative_eff(exp(LLarr))
#' rel_n_eff_2 <- relative_eff(exp(LLmat), chain_id = rep(1:2, each = 500))
#' all.equal(rel_n_eff_1, rel_n_eff_2)
#'
relative_eff <- function(x, ...) {
UseMethod("relative_eff")
}
#' @export
#' @templateVar fn relative_eff
#' @template vector
#'
relative_eff.default <- function(x, chain_id, ...) {
dim(x) <- c(length(x), 1)
class(x) <- "matrix"
relative_eff.matrix(x, chain_id)
}
#' @export
#' @templateVar fn relative_eff
#' @template matrix
#'
relative_eff.matrix <- function(x, chain_id, ..., cores = getOption("mc.cores", 1)) {
x <- llmatrix_to_array(x, chain_id)
relative_eff.array(x, cores = cores)
}
#' @export
#' @templateVar fn relative_eff
#' @template array
#'
relative_eff.array <- function(x, ..., cores = getOption("mc.cores", 1)) {
stopifnot(length(dim(x)) == 3)
S <- prod(dim(x)[1:2]) # posterior sample size = iter * chains
if (cores == 1) {
n_eff_vec <- apply(x, 3, posterior::ess_mean)
} else {
if (!os_is_windows()) {
n_eff_list <-
parallel::mclapply(
mc.cores = cores,
X = seq_len(dim(x)[3]),
FUN = function(i) posterior::ess_mean(x[, , i, drop = TRUE])
)
} else {
cl <- parallel::makePSOCKcluster(cores)
on.exit(parallel::stopCluster(cl))
n_eff_list <-
parallel::parLapply(
cl = cl,
X = seq_len(dim(x)[3]),
fun = function(i) posterior::ess_mean(x[, , i, drop = TRUE])
)
}
n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
}
return(n_eff_vec / S)
}
#' @export
#' @templateVar fn relative_eff
#' @template function
#' @param data,draws,... Same as for the [loo()] function method.
#'
relative_eff.function <-
function(x,
chain_id,
...,
cores = getOption("mc.cores", 1),
data = NULL,
draws = NULL) {
f_i <- validate_llfun(x) # not really an llfun, should return exp(ll) or exp(-ll)
N <- dim(data)[1]
if (cores == 1) {
n_eff_list <-
lapply(
X = seq_len(N),
FUN = function(i) {
val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
}
)
} else {
if (!os_is_windows()) {
n_eff_list <-
parallel::mclapply(
X = seq_len(N),
FUN = function(i) {
val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
},
mc.cores = cores
)
} else {
cl <- parallel::makePSOCKcluster(cores)
parallel::clusterExport(cl=cl, varlist=c("draws", "chain_id", "data"), envir=environment())
on.exit(parallel::stopCluster(cl))
n_eff_list <-
parallel::parLapply(
cl = cl,
X = seq_len(N),
fun = function(i) {
val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
}
)
}
}
n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
return(n_eff_vec)
}
#' @export
#' @describeIn relative_eff
#' If `x` is an object of class `"psis"`, `relative_eff()` simply returns
#' the `r_eff` attribute of `x`.
relative_eff.importance_sampling <- function(x, ...) {
attr(x, "r_eff")
}
# internal ----------------------------------------------------------------
#' Effective sample size for PSIS
#'
#' @noRd
#' @param w A vector or matrix (one column per observation) of normalized Pareto
#' smoothed weights (not log weights).
#' @param r_eff Relative effective sample size of `exp(log_lik)` or
#' `exp(-log_ratios)`. `r_eff` should be a scalar if `w` is a
#' vector and a vector of length `ncol(w)` if `w` is a matrix.
#' @return A scalar if `w` is a vector. A vector of length `ncol(w)`
#' if `w` is matrix.
#'
psis_n_eff <- function(w, ...) {
UseMethod("psis_n_eff")
}
#' @export
psis_n_eff.default <- function(w, r_eff = NULL, ...) {
ss <- sum(w^2)
if (is.null(r_eff)) {
return(1 / ss)
}
stopifnot(length(r_eff) == 1)
1 / ss * r_eff
}
#' @export
psis_n_eff.matrix <- function(w, r_eff = NULL, ...) {
ss <- colSums(w^2)
if (is.null(r_eff)) {
return(1 / ss)
}
if (length(r_eff) != length(ss) && length(r_eff) != 1) {
stop("r_eff must have length 1 or ncol(w).", call. = FALSE)
}
1 / ss * r_eff
}
|