1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/detect_lin_dep_alias.R
\name{detect.lindep}
\alias{detect.lindep}
\alias{detect.lindep.matrix}
\alias{detect.lindep.data.frame}
\alias{detect.lindep.plm}
\alias{alias.plm}
\alias{alias.pdata.frame}
\title{Functions to detect linear dependence}
\usage{
detect.lindep(object, ...)
\method{detect.lindep}{matrix}(object, suppressPrint = FALSE, ...)
\method{detect.lindep}{data.frame}(object, suppressPrint = FALSE, ...)
\method{detect.lindep}{plm}(object, suppressPrint = FALSE, ...)
\method{alias}{plm}(object, ...)
\method{alias}{pdata.frame}(
object,
model = c("pooling", "within", "Between", "between", "mean", "random", "fd"),
effect = c("individual", "time", "twoways"),
...
)
}
\arguments{
\item{object}{for \code{detect.lindep}: an object which should be checked
for linear dependence (of class \code{"matrix"}, \code{"data.frame"}, or
\code{"plm"}); for \code{alias}: either an estimated model of class
\code{"plm"} or a \code{"pdata.frame"}. Usually, one wants to input a model
matrix here or check an already estimated plm model,}
\item{\dots}{further arguments.}
\item{suppressPrint}{for \code{detect.lindep} only: logical indicating
whether a message shall be printed; defaults to printing the message, i. e.,
to \code{suppressPrint = FALSE},}
\item{model}{(see \code{plm}),}
\item{effect}{(see \code{plm}),}
}
\value{
For \code{detect.lindep}: A named numeric vector containing column
numbers of the linear dependent columns in the object after data
transformation, if any are present. \code{NULL} if no linear dependent
columns are detected.
For \code{alias}: return value of \code{\link[stats:alias]{stats::alias.lm()}} run on the
(quasi-)demeaned model, i. e., the information outputted applies to
the transformed model matrix, not the original data.
}
\description{
Little helper functions to aid users to detect linear dependent columns in a
two-dimensional data structure, especially in a (transformed) model matrix -
typically useful in interactive mode during model building phase.
}
\details{
Linear dependence of columns/variables is (usually) readily avoided when
building one's model. However, linear dependence is sometimes not obvious
and harder to detect for less experienced applied statisticians. The so
called "dummy variable trap" is a common and probably the best--known
fallacy of this kind (see e. g. Wooldridge (2016), sec. 7-2.). When building
linear models with \code{lm} or \code{plm}'s \code{pooling} model, linear
dependence in one's model is easily detected, at times post hoc.
However, linear dependence might also occur after some transformations of
the data, albeit it is not present in the untransformed data. The within
transformation (also called fixed effect transformation) used in the
\code{"within"} model can result in such linear dependence and this is
harder to come to mind when building a model. See \strong{Examples} for two
examples of linear dependent columns after the within transformation: ex. 1)
the transformed variables have the opposite sign of one another; ex. 2) the
transformed variables are identical.
During \code{plm}'s model estimation, linear dependent columns and their
corresponding coefficients in the resulting object are silently dropped,
while the corresponding model frame and model matrix still contain the
affected columns. The plm object contains an element \code{aliased} which
indicates any such aliased coefficients by a named logical.
Both functions, \code{detect.lindep} and \code{alias}, help to
detect linear dependence and accomplish almost the same:
\code{detect.lindep} is a stand alone implementation while
\code{alias} is a wrapper around
\code{\link[stats:alias]{stats::alias.lm()}}, extending the \code{alias}
generic to classes \code{"plm"} and \code{"pdata.frame"}.
\code{alias} hinges on the availability of the package
\CRANpkg{MASS} on the system. Not all arguments of \code{alias.lm}
are supported. Output of \code{alias} is more informative as it
gives the linear combination of dependent columns (after data
transformations, i. e., after (quasi)-demeaning) while
\code{detect.lindep} only gives columns involved in the linear
dependence in a simple format (thus being more suited for automatic
post--processing of the information).
}
\note{
function \code{detect.lindep} was called \code{detect_lin_dep}
initially but renamed for naming consistency later.
}
\examples{
### Example 1 ###
# prepare the data
data("Cigar" , package = "plm")
Cigar[ , "fact1"] <- c(0,1)
Cigar[ , "fact2"] <- c(1,0)
Cigar.p <- pdata.frame(Cigar)
# setup a formula and a model frame
form <- price ~ 0 + cpi + fact1 + fact2
mf <- model.frame(Cigar.p, form)
# no linear dependence in the pooling model's model matrix
# (with intercept in the formula, there would be linear depedence)
detect.lindep(model.matrix(mf, model = "pooling"))
# linear dependence present in the FE transformed model matrix
modmat_FE <- model.matrix(mf, model = "within")
detect.lindep(modmat_FE)
mod_FE <- plm(form, data = Cigar.p, model = "within")
detect.lindep(mod_FE)
alias(mod_FE) # => fact1 == -1*fact2
plm(form, data = mf, model = "within")$aliased # "fact2" indicated as aliased
# look at the data: after FE transformation fact1 == -1*fact2
head(modmat_FE)
all.equal(modmat_FE[ , "fact1"], -1*modmat_FE[ , "fact2"])
### Example 2 ###
# Setup the data:
# Assume CEOs stay with the firms of the Grunfeld data
# for the firm's entire lifetime and assume some fictional
# data about CEO tenure and age in year 1935 (first observation
# in the data set) to be at 1 to 10 years and 38 to 55 years, respectively.
# => CEO tenure and CEO age increase by same value (+1 year per year).
data("Grunfeld", package = "plm")
set.seed(42)
# add fictional data
Grunfeld$CEOtenure <- c(replicate(10, seq(from=s<-sample(1:10, 1), to=s+19, by=1)))
Grunfeld$CEOage <- c(replicate(10, seq(from=s<-sample(38:65, 1), to=s+19, by=1)))
# look at the data
head(Grunfeld, 50)
form <- inv ~ value + capital + CEOtenure + CEOage
mf <- model.frame(pdata.frame(Grunfeld), form)
# no linear dependent columns in original data/pooling model
modmat_pool <- model.matrix(mf, model="pooling")
detect.lindep(modmat_pool)
mod_pool <- plm(form, data = Grunfeld, model = "pooling")
alias(mod_pool)
# CEOtenure and CEOage are linear dependent after FE transformation
# (demeaning per individual)
modmat_FE <- model.matrix(mf, model="within")
detect.lindep(modmat_FE)
mod_FE <- plm(form, data = Grunfeld, model = "within")
detect.lindep(mod_FE)
alias(mod_FE)
# look at the transformed data: after FE transformation CEOtenure == 1*CEOage
head(modmat_FE, 50)
all.equal(modmat_FE[ , "CEOtenure"], modmat_FE[ , "CEOage"])
}
\references{
\insertRef{WOOL:13}{plm}
}
\seealso{
\code{\link[stats:alias]{stats::alias()}}, \code{\link[stats:model.matrix]{stats::model.matrix()}} and especially
\code{plm}'s \code{\link[=model.matrix]{model.matrix()}} for (transformed) model matrices,
plm's \code{\link[=model.frame]{model.frame()}}.
}
\author{
Kevin Tappe
}
\keyword{array}
\keyword{manip}
|