1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
#' Model comparison (deprecated, old version)
#'
#' **This function is deprecated**. Please use the new [loo_compare()] function
#' instead.
#'
#' @export
#' @param ... At least two objects returned by [loo()] (or [waic()]).
#' @param x A list of at least two objects returned by [loo()] (or
#' [waic()]). This argument can be used as an alternative to
#' specifying the objects in `...`.
#'
#' @return A vector or matrix with class `'compare.loo'` that has its own
#' print method. If exactly two objects are provided in `...` or
#' `x`, then the difference in expected predictive accuracy and the
#' standard error of the difference are returned. If more than two objects are
#' provided then a matrix of summary information is returned (see **Details**).
#'
#' @details
#' When comparing two fitted models, we can estimate the difference in their
#' expected predictive accuracy by the difference in `elpd_loo` or
#' `elpd_waic` (or multiplied by -2, if desired, to be on the
#' deviance scale).
#'
#' *When that difference, `elpd_diff`, is positive then the expected
#' predictive accuracy for the second model is higher. A negative
#' `elpd_diff` favors the first model.*
#'
#' When using `compare()` with more than two models, the values in the
#' `elpd_diff` and `se_diff` columns of the returned matrix are
#' computed by making pairwise comparisons between each model and the model
#' with the best ELPD (i.e., the model in the first row).
#' Although the `elpd_diff` column is equal to the difference in
#' `elpd_loo`, do not expect the `se_diff` column to be equal to the
#' the difference in `se_elpd_loo`.
#'
#' To compute the standard error of the difference in ELPD we use a
#' paired estimate to take advantage of the fact that the same set of _N_
#' data points was used to fit both models. These calculations should be most
#' useful when _N_ is large, because then non-normality of the
#' distribution is not such an issue when estimating the uncertainty in these
#' sums. These standard errors, for all their flaws, should give a better
#' sense of uncertainty than what is obtained using the current standard
#' approach of comparing differences of deviances to a Chi-squared
#' distribution, a practice derived for Gaussian linear models or
#' asymptotically, and which only applies to nested models in any case.
#'
#' @template loo-and-psis-references
#'
#' @examples
#' \dontrun{
#' loo1 <- loo(log_lik1)
#' loo2 <- loo(log_lik2)
#' print(compare(loo1, loo2), digits = 3)
#' print(compare(x = list(loo1, loo2)))
#'
#' waic1 <- waic(log_lik1)
#' waic2 <- waic(log_lik2)
#' compare(waic1, waic2)
#' }
#'
compare <- function(..., x = list()) {
.Deprecated("loo_compare")
dots <- list(...)
if (length(dots)) {
if (length(x)) {
stop("If 'x' is specified then '...' should not be specified.",
call. = FALSE)
}
nms <- as.character(match.call(expand.dots = TRUE))[-1L]
} else {
if (!is.list(x) || !length(x)) {
stop("'x' must be a list.", call. = FALSE)
}
dots <- x
nms <- names(dots)
if (!length(nms)) {
nms <- paste0("model", seq_along(dots))
}
}
if (!all(sapply(dots, is.loo))) {
stop("All inputs should have class 'loo'.")
}
if (length(dots) <= 1L) {
stop("'compare' requires at least two models.")
} else if (length(dots) == 2L) {
loo1 <- dots[[1]]
loo2 <- dots[[2]]
comp <- compare_two_models(loo1, loo2)
class(comp) <- c(class(comp), "old_compare.loo")
return(comp)
} else {
Ns <- sapply(dots, function(x) nrow(x$pointwise))
if (!all(Ns == Ns[1L])) {
stop("Not all models have the same number of data points.", call. = FALSE)
}
x <- sapply(dots, function(x) {
est <- x$estimates
setNames(c(est), nm = c(rownames(est), paste0("se_", rownames(est))) )
})
colnames(x) <- nms
rnms <- rownames(x)
comp <- x
ord <- order(x[grep("^elpd", rnms), ], decreasing = TRUE)
comp <- t(comp)[ord, ]
patts <- c("elpd", "p_", "^waic$|^looic$", "^se_waic$|^se_looic$")
col_ord <- unlist(sapply(patts, function(p) grep(p, colnames(comp))),
use.names = FALSE)
comp <- comp[, col_ord]
# compute elpd_diff and se_elpd_diff relative to best model
rnms <- rownames(comp)
diffs <- mapply(elpd_diffs, dots[ord[1]], dots[ord])
elpd_diff <- apply(diffs, 2, sum)
se_diff <- apply(diffs, 2, se_elpd_diff)
comp <- cbind(elpd_diff = elpd_diff, se_diff = se_diff, comp)
rownames(comp) <- rnms
class(comp) <- c("compare.loo", class(comp), "old_compare.loo")
comp
}
}
# internal ----------------------------------------------------------------
compare_two_models <- function(loo_a, loo_b, return = c("elpd_diff", "se"), check_dims = TRUE) {
if (check_dims) {
if (dim(loo_a$pointwise)[1] != dim(loo_b$pointwise)[1]) {
stop(paste("Models don't have the same number of data points.",
"\nFound N_1 =", dim(loo_a$pointwise)[1], "and N_2 =", dim(loo_b$pointwise)[1]), call. = FALSE)
}
}
diffs <- elpd_diffs(loo_a, loo_b)
comp <- c(elpd_diff = sum(diffs), se = se_elpd_diff(diffs))
structure(comp, class = "compare.loo")
}
|