File: proj-pred.Rd

package info (click to toggle)
r-cran-projpred 2.0.2%2Bdfsg-1
links: PTS, VCS
area: main
in suites: bullseye
size: 740 kB
sloc: cpp: 355; sh: 14; makefile: 2
file content (113 lines) | stat: -rw-r--r-- 4,073 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{proj-pred}
\alias{proj-pred}
\alias{proj_linpred}
\alias{proj_predict}
\title{Extract draws of the linear predictor and draw from the predictive
distribution of the projected submodel}
\usage{
proj_linpred(
  object,
  newdata,
  offsetnew = NULL,
  weightsnew = NULL,
  nterms = NULL,
  transform = FALSE,
  integrated = FALSE,
  seed = NULL,
  ...
)

proj_predict(
  object,
  newdata,
  offsetnew = NULL,
  weightsnew = NULL,
  nterms = NULL,
  ndraws = 1000,
  seed = NULL,
  ...
)
}
\arguments{
\item{object}{Either an object returned by \link[=varsel]{varsel},
\link[=cv_varsel]{cv_varsel} or \link[=init_refmodel]{init_refmodel}, or
alternatively any object that can be converted to a reference model.}

\item{newdata}{The predictor values used in the prediction. If
\code{solution_terms} is specified, then \code{newdata} should either be a
dataframe containing column names that correspond to \code{solution_terms}
or a matrix with the number and order of columns corresponding to
\code{solution_terms}. If \code{solution_terms} is unspecified, then
\code{newdata} must either be a dataframe containing all the column names
as in the original data or a matrix with the same columns at the same
positions as in the original data.}

\item{offsetnew}{Offsets for the new observations. By default a vector of
zeros. By default we take the weights from newdata as in the original
model. Either NULL or right hand side formula.}

\item{weightsnew}{Weights for the new observations. For binomial model,
corresponds to the number trials per observation. For \code{proj_linpred},
this argument matters only if \code{newdata} is specified. By default we
take the weights from newdata as in the original model. Either NULL or
right hand side formula.}

\item{nterms}{Number of terms in the submodel (the variable combination is
taken from the variable selection information). If a vector with several
values, then results for all specified model sizes are returned. Ignored if
\code{solution_terms} is specified. By default use the automatically
suggested model size.}

\item{transform}{Should the linear predictor be transformed using the
inverse-link function? Default is \code{FALSE}. For \code{proj_linpred}
only.}

\item{integrated}{If \code{TRUE}, the output is averaged over the parameters.
Default is \code{FALSE}. For \code{proj_linpred} only.}

\item{seed}{An optional seed to use for drawing from the projection. For
\code{proj_predict} only.}

\item{...}{Additional argument passed to \link{project} if \code{object} is
an object returned by \link{varsel} or \link{cv_varsel}.}

\item{ndraws}{Number of draws to return from the predictive distribution of
the projection. The default is 1000. For \code{proj_predict} only.}
}
\value{
If the prediction is done for one submodel only (\code{nterms} has
  length one or \code{solution_terms} is specified) and newdata is
  unspecified, a matrix or vector of predictions (depending on the value of
  \code{integrated}). If \code{newdata} is specified, returns a list with
  elements pred (predictions) and lpd (log predictive densities). If the
  predictions are done for several submodel sizes, returns a list with one
  element for each submodel.
}
\description{
\code{proj_linpred} extracts draws of the linear predictor and
\code{proj_predict} draws from the predictive distribution of the projected
submodel or submodels. If the projection has not been performed, the
functions also perform the projection.
}
\examples{
\donttest{
if (requireNamespace('rstanarm', quietly=TRUE)) {
  ### Usage with stanreg objects
  n <- 30
  d <- 5
  x <- matrix(rnorm(n*d), nrow=n)
  y <- x[,1] + 0.5*rnorm(n)
  data <- data.frame(x,y)
  
  fit <- rstanarm::stan_glm(y ~ X1 + X2 + X3 + X4 + X5, gaussian(), data=data, chains=2, iter=500)
  vs <- varsel(fit)
  
  # compute predictions with 4 variables at the training points
  pred <- proj_linpred(vs, newdata = data, nv = 4)
  pred <- proj_predict(vs, newdata = data, nv = 4)
}
}

}