File: effective_sample_sizes.R

package info (click to toggle)
r-cran-loo 2.9.0-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 4,836 kB
sloc: sh: 15; makefile: 2
file content (194 lines) | stat: -rw-r--r-- 5,876 bytes
#' Convenience function for computing relative efficiencies
#'
#' `relative_eff()` computes the the MCMC effective sample size divided by
#' the total sample size.
#'
#' @export
#' @param x A vector, matrix, 3-D array, or function. See the **Methods (by
#'   class)** section below for details on specifying `x`, but where
#'   "log-likelihood" is mentioned replace it with one of the following
#'   depending on the use case:
#'   * For use with the [loo()] function, the values in `x` (or generated by
#'     `x`, if a function) should be **likelihood** values
#'     (i.e., `exp(log_lik)`), not on the log scale.
#'   * For generic use with [psis()], the values in `x` should be the reciprocal
#'     of the importance ratios (i.e., `exp(-log_ratios)`).
#' @param chain_id A vector of length `NROW(x)` containing MCMC chain
#'   indexes for each each row of `x` (if a matrix) or each value in
#'   `x` (if a vector). No `chain_id` is needed if `x` is a 3-D
#'   array. If there are `C` chains then valid chain indexes are values
#'   in `1:C`.
#' @param cores The number of cores to use for parallelization.
#' @return A vector of relative effective sample sizes.
#'
#' @examples
#' LLarr <- example_loglik_array()
#' LLmat <- example_loglik_matrix()
#' dim(LLarr)
#' dim(LLmat)
#'
#' rel_n_eff_1 <- relative_eff(exp(LLarr))
#' rel_n_eff_2 <- relative_eff(exp(LLmat), chain_id = rep(1:2, each = 500))
#' all.equal(rel_n_eff_1, rel_n_eff_2)
#'
relative_eff <- function(x, ...) {
  UseMethod("relative_eff")
}

#' @export
#' @templateVar fn relative_eff
#' @template vector
#'
relative_eff.default <- function(x, chain_id, ...) {
  dim(x) <- c(length(x), 1)
  class(x) <- "matrix"
  relative_eff.matrix(x, chain_id)
}

#' @export
#' @templateVar fn relative_eff
#' @template matrix
#'
relative_eff.matrix <- function(x, chain_id, ..., cores = getOption("mc.cores", 1)) {
  x <- llmatrix_to_array(x, chain_id)
  relative_eff.array(x, cores = cores)
}

#' @export
#' @templateVar fn relative_eff
#' @template array
#'
relative_eff.array <- function(x, ..., cores = getOption("mc.cores", 1)) {
  stopifnot(length(dim(x)) == 3)
  S <- prod(dim(x)[1:2]) # posterior sample size = iter * chains

  if (cores == 1) {
    n_eff_vec <- apply(x, 3, posterior::ess_mean)
  } else {
    if (!os_is_windows()) {
      n_eff_list <-
        parallel::mclapply(
          mc.cores = cores,
          X = seq_len(dim(x)[3]),
          FUN = function(i) posterior::ess_mean(x[, , i, drop = TRUE])
        )
    } else {
      cl <- parallel::makePSOCKcluster(cores)
      on.exit(parallel::stopCluster(cl))
      n_eff_list <-
        parallel::parLapply(
          cl = cl,
          X = seq_len(dim(x)[3]),
          fun = function(i) posterior::ess_mean(x[, , i, drop = TRUE])
        )
    }
    n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
  }

  return(n_eff_vec / S)
}

#' @export
#' @templateVar fn relative_eff
#' @template function
#' @param data,draws,... Same as for the [loo()] function method.
#'
relative_eff.function <-
  function(x,
           chain_id,
           ...,
           cores = getOption("mc.cores", 1),
           data = NULL,
           draws = NULL) {

    f_i <- validate_llfun(x) # not really an llfun, should return exp(ll) or exp(-ll)
    N <- dim(data)[1]

    if (cores == 1) {
      n_eff_list <-
        lapply(
          X = seq_len(N),
          FUN = function(i) {
            val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
            relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
          }
        )
    } else {
      if (!os_is_windows()) {
        n_eff_list <-
          parallel::mclapply(
            X = seq_len(N),
            FUN = function(i) {
              val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
              relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
            },
            mc.cores = cores
          )
      } else {
        cl <- parallel::makePSOCKcluster(cores)
        parallel::clusterExport(cl=cl, varlist=c("draws", "chain_id", "data"), envir=environment())
        on.exit(parallel::stopCluster(cl))
        n_eff_list <-
          parallel::parLapply(
            cl = cl,
            X = seq_len(N),
            fun = function(i) {
              val_i <- f_i(data_i = data[i, , drop = FALSE], draws = draws, ...)
              relative_eff.default(as.vector(val_i), chain_id = chain_id, cores = 1)
            }
          )
      }
    }

    n_eff_vec <- unlist(n_eff_list, use.names = FALSE)
    return(n_eff_vec)
  }

#' @export
#' @describeIn relative_eff
#'   If `x` is an object of class `"psis"`, `relative_eff()` simply returns
#'   the `r_eff` attribute of `x`.
relative_eff.importance_sampling <- function(x, ...) {
  attr(x, "r_eff")
}


# internal ----------------------------------------------------------------


#' Effective sample size for PSIS
#'
#' @noRd
#' @param w A vector or matrix (one column per observation) of normalized Pareto
#'   smoothed weights (not log weights).
#' @param r_eff Relative effective sample size of `exp(log_lik)` or
#'   `exp(-log_ratios)`. `r_eff` should be a scalar if `w` is a
#'   vector and a vector of length `ncol(w)` if `w` is a matrix.
#' @return A scalar if `w` is a vector. A vector of length `ncol(w)`
#'   if `w` is matrix.
#'
psis_n_eff <- function(w, ...) {
  UseMethod("psis_n_eff")
}

#' @export
psis_n_eff.default <- function(w, r_eff = NULL, ...) {
  ss <- sum(w^2)
  if (is.null(r_eff)) {
    return(1 / ss)
  }
  stopifnot(length(r_eff) == 1)
  1 / ss * r_eff
}

#' @export
psis_n_eff.matrix <- function(w, r_eff = NULL, ...) {
  ss <- colSums(w^2)
  if (is.null(r_eff)) {
    return(1 / ss)
  }
  if (length(r_eff) != length(ss) && length(r_eff) != 1) {
    stop("r_eff must have length 1 or ncol(w).", call. = FALSE)
  }
  1 / ss * r_eff
}