1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/standardize_parameters.R,
% R/standardize_posteriors.R
\name{standardize_parameters}
\alias{standardize_parameters}
\alias{standardise_parameters}
\alias{standardize_posteriors}
\alias{standardise_posteriors}
\title{Parameters standardization}
\usage{
standardize_parameters(
model,
method = "refit",
ci = 0.95,
robust = FALSE,
two_sd = FALSE,
include_response = TRUE,
verbose = TRUE,
...
)
standardize_posteriors(
model,
method = "refit",
robust = FALSE,
two_sd = FALSE,
include_response = TRUE,
verbose = TRUE,
...
)
}
\arguments{
\item{model}{A statistical model.}
\item{method}{The method used for standardizing the parameters. Can be
\code{"refit"} (default), \code{"posthoc"}, \code{"smart"}, \code{"basic"}, \code{"pseudo"} or
\code{"sdy"}. See Details'.}
\item{ci}{Confidence Interval (CI) level}
\item{robust}{Logical, if \code{TRUE}, centering is done by subtracting the
median from the variables and dividing it by the median absolute deviation
(MAD). If \code{FALSE}, variables are standardized by subtracting the
mean and dividing it by the standard deviation (SD).}
\item{two_sd}{If \code{TRUE}, the variables are scaled by two times the deviation
(SD or MAD depending on \code{robust}). This method can be useful to obtain
model coefficients of continuous parameters comparable to coefficients
related to binary predictors, when applied to \strong{the predictors} (not the
outcome) (Gelman, 2008).}
\item{include_response}{If \code{TRUE} (default), the response value will also be
standardized. If \code{FALSE}, only the predictors will be standardized. For
GLMs the response value will never be standardized (see \emph{Generalized Linear
Models} section).}
\item{verbose}{Toggle warnings and messages on or off.}
\item{...}{For \code{standardize_parameters()}, arguments passed to
\code{\link[=model_parameters]{model_parameters()}}, such as:
\itemize{
\item \code{ci_method}, \code{centrality} for Mixed models and Bayesian models...
\item \code{exponentiate}, ...
\item etc.
}}
}
\value{
A data frame with the standardized parameters (\verb{Std_*}, depending on
the model type) and their CIs (\code{CI_low} and \code{CI_high}). Where applicable,
standard errors (SEs) are returned as an attribute (\code{attr(x, "standard_error")}).
}
\description{
Compute standardized model parameters (coefficients).
}
\details{
\subsection{Standardization Methods}{
\itemize{
\item \strong{refit}: This method is based on a complete model re-fit with a
standardized version of the data. Hence, this method is equal to
standardizing the variables before fitting the model. It is the "purest" and
the most accurate (Neter et al., 1989), but it is also the most
computationally costly and long (especially for heavy models such as Bayesian
models). This method is particularly recommended for complex models that
include interactions or transformations (e.g., polynomial or spline terms).
The \code{robust} (default to \code{FALSE}) argument enables a robust standardization
of data, i.e., based on the \code{median} and \code{MAD} instead of the \code{mean} and
\code{SD}. \strong{See \code{\link[datawizard:standardize]{datawizard::standardize()}} for more details.}
\itemize{
\item \strong{Note} that \code{standardize_parameters(method = "refit")} may not return
the same results as fitting a model on data that has been standardized with
\code{standardize()}; \code{standardize_parameters()} used the data used by the model
fitting function, which might not be same data if there are missing values.
see the \code{remove_na} argument in \code{standardize()}.
}
\item \strong{posthoc}: Post-hoc standardization of the parameters, aiming at
emulating the results obtained by "refit" without refitting the model. The
coefficients are divided by the standard deviation (or MAD if \code{robust}) of
the outcome (which becomes their expression 'unit'). Then, the coefficients
related to numeric variables are additionally multiplied by the standard
deviation (or MAD if \code{robust}) of the related terms, so that they correspond
to changes of 1 SD of the predictor (e.g., "A change in 1 SD of \code{x} is
related to a change of 0.24 of the SD of \code{y}). This does not apply to binary
variables or factors, so the coefficients are still related to changes in
levels. This method is not accurate and tend to give aberrant results when
interactions are specified.
\item \strong{basic}: This method is similar to \code{method = "posthoc"}, but treats all
variables as continuous: it also scales the coefficient by the standard
deviation of model's matrix' parameter of factors levels (transformed to
integers) or binary predictors. Although being inappropriate for these cases,
this method is the one implemented by default in other software packages,
such as \code{\link[lm.beta:lm.beta]{lm.beta::lm.beta()}}.
\item \strong{smart} (Standardization of Model's parameters with Adjustment,
Reconnaissance and Transformation - \emph{experimental}): Similar to \code{method = "posthoc"} in that it does not involve model refitting. The difference is
that the SD (or MAD if \code{robust}) of the response is computed on the relevant
section of the data. For instance, if a factor with 3 levels A (the
intercept), B and C is entered as a predictor, the effect corresponding to B
vs. A will be scaled by the variance of the response at the intercept only.
As a results, the coefficients for effects of factors are similar to a Glass'
delta.
\item \strong{pseudo} (\emph{for 2-level (G)LMMs only}): In this (post-hoc) method, the
response and the predictor are standardized based on the level of prediction
(levels are detected with \code{\link[performance:check_heterogeneity_bias]{performance::check_heterogeneity_bias()}}): Predictors
are standardized based on their SD at level of prediction (see also
\code{\link[datawizard:demean]{datawizard::demean()}}); The outcome (in linear LMMs) is standardized based
on a fitted random-intercept-model, where \code{sqrt(random-intercept-variance)}
is used for level 2 predictors, and \code{sqrt(residual-variance)} is used for
level 1 predictors (Hoffman 2015, page 342). A warning is given when a
within-group variable is found to have access between-group variance.
\item \strong{sdy} (\emph{for logistic regression models only}): This y-standardization
is useful when comparing coefficients of logistic regression models across
models for the same sample. Unobserved heterogeneity varies across models
with different independent variables, and thus, odds ratios from the same
predictor of different models cannot be compared directly. The
y-standardization makes coefficients "comparable across models by dividing
them with the estimated standard deviation of the latent variable for each
model" (Mood 2010). Thus, whenever one has multiple logistic regression models
that are fit to the same data and share certain predictors (e.g. nested
models), it can be useful to use this standardization approach to make
log-odds or odds ratios comparable.
}
}
\subsection{Transformed Variables}{
When the model's formula contains transformations (e.g. \code{y ~ exp(X)}) \code{method = "refit"} will give different results compared to \code{method = "basic"}
(\code{"posthoc"} and \code{"smart"} do not support such transformations): While
\code{"refit"} standardizes the data \emph{prior} to the transformation (e.g.
equivalent to \code{exp(scale(X))}), the \code{"basic"} method standardizes the
transformed data (e.g. equivalent to \code{scale(exp(X))}).
\cr\cr
See the \emph{Transformed Variables} section in \code{\link[datawizard:standardize.default]{datawizard::standardize.default()}}
for more details on how different transformations are dealt with when
\code{method = "refit"}.
}
\subsection{Confidence Intervals}{
The returned confidence intervals are re-scaled versions of the
unstandardized confidence intervals, and not "true" confidence intervals of
the standardized coefficients (cf. Jones & Waller, 2015).
}
\subsection{Generalized Linear Models}{
Standardization for generalized linear models (GLM, GLMM, etc) is done only
with respect to the predictors (while the outcome remains as-is,
unstandardized) - maintaining the interpretability of the coefficients (e.g.,
in a binomial model: the exponent of the standardized parameter is the OR of
a change of 1 SD in the predictor, etc.)
}
\subsection{Dealing with Factors}{
\code{standardize(model)} or \code{standardize_parameters(model, method = "refit")} do
\emph{not} standardize categorical predictors (i.e. factors) / their
dummy-variables, which may be a different behaviour compared to other R
packages (such as \pkg{lm.beta}) or other software packages (like SPSS). To
mimic such behaviours, either use \code{standardize_parameters(model, method = "basic")} to obtain post-hoc standardized parameters, or standardize the data
with \code{datawizard::standardize(data, force = TRUE)} \emph{before} fitting the
model.
}
}
\examples{
model <- lm(len ~ supp * dose, data = ToothGrowth)
standardize_parameters(model, method = "refit")
\donttest{
standardize_parameters(model, method = "posthoc")
standardize_parameters(model, method = "smart")
standardize_parameters(model, method = "basic")
# Robust and 2 SD
standardize_parameters(model, robust = TRUE)
standardize_parameters(model, two_sd = TRUE)
model <- glm(am ~ cyl * mpg, data = mtcars, family = "binomial")
standardize_parameters(model, method = "refit")
standardize_parameters(model, method = "posthoc")
standardize_parameters(model, method = "basic", exponentiate = TRUE)
}
\dontshow{if (require("lme4", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\donttest{
m <- lme4::lmer(mpg ~ cyl + am + vs + (1 | cyl), mtcars)
standardize_parameters(m, method = "pseudo", ci_method = "satterthwaite")
}
\dontshow{\}) # examplesIf}
\dontshow{if (require("rstanarm", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\donttest{
model <- rstanarm::stan_glm(rating ~ critical + privileges, data = attitude, refresh = 0)
standardize_posteriors(model, method = "refit", verbose = FALSE)
standardize_posteriors(model, method = "posthoc", verbose = FALSE)
standardize_posteriors(model, method = "smart", verbose = FALSE)
head(standardize_posteriors(model, method = "basic", verbose = FALSE))
}
\dontshow{\}) # examplesIf}
}
\references{
\itemize{
\item Hoffman, L. (2015). Longitudinal analysis: Modeling within-person fluctuation
and change. Routledge.
\item Jones, J. A., & Waller, N. G. (2015). The normal-theory and asymptotic
distribution-free (ADF) covariance matrix of standardized regression
coefficients: theoretical extensions and finite sample behavior.
Psychometrika, 80(2), 365-378.
\item Neter, J., Wasserman, W., & Kutner, M. H. (1989). Applied linear
regression models.
\item Gelman, A. (2008). Scaling regression inputs by dividing by two standard
deviations. Statistics in medicine, 27(15), 2865-2873.
\item Mood C. Logistic Regression: Why We Cannot Do What We Think We Can Do, and
What We Can Do About It. European Sociological Review (2010) 26:67–82.
}
}
\seealso{
See also \href{https://easystats.github.io/parameters/articles/standardize_parameters_effsize.html}{package vignette}.
Other standardize:
\code{\link{standardize_info}()}
}
\concept{effect size indices}
\concept{standardize}
|