File: summary.vsel.Rd

package info (click to toggle)
r-cran-projpred 2.3.0%2Bdfsg-1
links: PTS, VCS
area: main
in suites: bookworm
size: 1,180 kB
sloc: cpp: 296; sh: 14; makefile: 5
file content (104 lines) | stat: -rw-r--r-- 4,988 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{summary.vsel}
\alias{summary.vsel}
\title{Summary statistics of a variable selection}
\usage{
\method{summary}{vsel}(
  object,
  nterms_max = NULL,
  stats = "elpd",
  type = c("mean", "se", "diff", "diff.se"),
  deltas = FALSE,
  alpha = 2 * pnorm(-1),
  baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
  ...
)
}
\arguments{
\item{object}{An object of class \code{vsel} (returned by \code{\link[=varsel]{varsel()}} or
\code{\link[=cv_varsel]{cv_varsel()}}).}

\item{nterms_max}{Maximum submodel size for which the statistics are
calculated. Using \code{NULL} is effectively the same as using
\code{length(solution_terms(object))}. Note that \code{nterms_max} does not count the
intercept, so use \code{nterms_max = 0} for the intercept-only model. For
\code{\link[=plot.vsel]{plot.vsel()}}, \code{nterms_max} must be at least \code{1}.}

\item{stats}{One or more character strings determining which performance
statistics (i.e., utilities or losses) to calculate. Available statistics
are:
\itemize{
\item \code{"elpd"}: (expected) sum of log predictive densities.
\item \code{"mlpd"}: mean log predictive density, that is, \code{"elpd"} divided by the
number of observations.
\item \code{"mse"}: mean squared error.
\item \code{"rmse"}: root mean squared error. For the corresponding standard error
and lower and upper confidence interval bounds, bootstrapping is used.
\item \code{"acc"} (or its alias, \code{"pctcorr"}): classification accuracy
(\code{\link[=binomial]{binomial()}} family only).
\item \code{"auc"}: area under the ROC curve (\code{\link[=binomial]{binomial()}} family only). For the
corresponding standard error and lower and upper confidence interval
bounds, bootstrapping is used.
}}

\item{type}{One or more items from \code{"mean"}, \code{"se"}, \code{"lower"}, \code{"upper"},
\code{"diff"}, and \code{"diff.se"} indicating which of these to compute for each
item from \code{stats} (mean, standard error, lower and upper confidence
interval bounds, mean difference to the corresponding statistic of the
reference model, and standard error of this difference, respectively). The
confidence interval bounds belong to normal-approximation (or bootstrap;
see argument \code{stats}) confidence intervals with (nominal) coverage \code{1 - alpha}. Items \code{"diff"} and \code{"diff.se"} are only supported if \code{deltas} is
\code{FALSE}.}

\item{deltas}{If \code{TRUE}, the submodel statistics are estimated as differences
from the baseline model (see argument \code{baseline}). With a "difference
\emph{from} the baseline model", we mean to take the submodel statistic minus
the baseline model statistic (not the other way round).}

\item{alpha}{A number determining the (nominal) coverage \code{1 - alpha} of the
normal-approximation (or bootstrap; see argument \code{stats}) confidence
intervals. For example, in case of the normal approximation, \code{alpha = 2 * pnorm(-1)} corresponds to a confidence interval stretching by one standard
error on either side of the point estimate.}

\item{baseline}{For \code{\link[=summary.vsel]{summary.vsel()}}: Only relevant if \code{deltas} is \code{TRUE}.
For \code{\link[=plot.vsel]{plot.vsel()}}: Always relevant. Either \code{"ref"} or \code{"best"}, indicating
whether the baseline is the reference model or the best submodel found (in
terms of \code{stats[1]}), respectively.}

\item{...}{Arguments passed to the internal function which is used for
bootstrapping (if applicable; see argument \code{stats}). Currently, relevant
arguments are \code{B} (the number of bootstrap samples, defaulting to \code{2000})
and \code{seed} (see \code{\link[=set.seed]{set.seed()}}, defaulting to
\code{sample.int(.Machine$integer.max, 1)}, but can also be \code{NA} to not call
\code{\link[=set.seed]{set.seed()}} at all).}
}
\value{
An object of class \code{vselsummary}.
}
\description{
This is the \code{\link[=summary]{summary()}} method for \code{vsel} objects (returned by \code{\link[=varsel]{varsel()}} or
\code{\link[=cv_varsel]{cv_varsel()}}).
}
\examples{
if (requireNamespace("rstanarm", quietly = TRUE)) {
  # Data:
  dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x)

  # The "stanreg" fit which will be used as the reference model (with small
  # values for `chains` and `iter`, but only for technical reasons in this
  # example; this is not recommended in general):
  fit <- rstanarm::stan_glm(
    y ~ X1 + X2 + X3 + X4 + X5, family = gaussian(), data = dat_gauss,
    QR = TRUE, chains = 2, iter = 500, refresh = 0, seed = 9876
  )

  # Variable selection (here without cross-validation and with small values
  # for `nterms_max`, `nclusters`, and `nclusters_pred`, but only for the
  # sake of speed in this example; this is not recommended in general):
  vs <- varsel(fit, nterms_max = 3, nclusters = 5, nclusters_pred = 10,
               seed = 5555)
  print(summary(vs))
}

}