1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
|
\name{stepmented}
\alias{stepmented}
\alias{stepmented.lm}
\alias{stepmented.glm}
\alias{stepmented.ts}
\alias{stepmented.numeric}
%\alias{stepmented.default}
%\alias{stepmented.Arima}
%\alias{print.stepmented}
%\alias{summary.stepmented}
%\alias{print.summary.stepmented}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
stepmented relationships in regression models
}
\description{
Fits regression models with stepmented (i.e. piecewise-constant) relationships between the response and one or more explanatory variables. Break-point estimates are provided.
}
\usage{
stepmented(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
keep.class=FALSE, var.psi=FALSE, ...)
%\method{stepmented}{default}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...)
\method{stepmented}{lm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
keep.class=FALSE, var.psi=FALSE, ...)
\method{stepmented}{glm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
keep.class=FALSE, var.psi=FALSE, ...)
\method{stepmented}{numeric}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
keep.class=FALSE, var.psi=FALSE, ...,
pertV=0, centerX=FALSE, adjX=NULL, weights=NULL)
\method{stepmented}{ts}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
keep.class=FALSE, var.psi=FALSE, ...,
pertV=0, centerX=FALSE, adjX=NULL)
%\method{stepmented}{Arima}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
% model = TRUE, keep.class=FALSE, ...)
}
%- ------------------------------>>>>> "Arima"
\arguments{
\item{obj}{A standard `linear' regression model of class "lm" or "glm". Alternatively, a simple "ts" object or a simple data vector may be supplied.
}
\item{seg.Z}{ the stepmented variables(s), i.e. the numeric covariate(s) understood to have a piecewise-constant relationship with response. It is a formula with no response variable, such as \code{seg.Z=~x} or \code{seg.Z=~x1+x2}. Currently, formulas involving functions,
such as \code{seg.Z=~log(x1)}, or selection operators, such as \code{seg.Z=~d[,"x1"]} or \code{seg.Z=~d$x1}, are \emph{not} allowed. Also, variable names formed by \verb{U} or \verb{V} only (with or without numbers ) are not permitted. If missing, the index variable \code{id=1,2,..,n} is used. For \code{stepmented.ts}, \code{seg.Z} is usually unspecified as the (time) covariate is obtained by the \code{ts} object itself.
}
\item{psi}{ starting values for the breakpoints to be estimated. If there is a single stepmented variable specified in \code{seg.Z}, \code{psi} can be a numeric vector, and it can be missing when 1 breakpoint has to be estimated (and the median of the stepmented variable is used as a starting value). If \code{seg.Z} includes several covariates, \code{psi} has to be specified as a \emph{named} list of vectors whose names have to match the variables in the \code{seg.Z} argument. Each vector of such list includes starting values for the break-point(s) for the corresponding variable in \code{seg.Z}. A \code{NA} value means that `\code{K}' quantiles (or equally spaced values) are used as starting values; \code{K} is fixed via the \code{\link{seg.control}} auxiliary function.
}
\item{npsi}{A named vector or list meaning the \emph{number} (and not locations) of breakpoints to be estimated. The starting values will be internally computed via the quantiles or equally spaced values, as specified in argument \code{quant} in \code{\link{seg.control}}. \code{npsi} can be missing and \code{npsi=1} is assumed for all variables specified in \code{seg.Z}. If \code{psi} is provided, \code{npsi} is ignored.}
\item{fixed.psi}{An optional named list including the breakpoint values to be kept fixed during the estimation procedure. The names should be a subset of (or even the same) variables specified in \code{seg.Z}. If there is a single variable in \code{seg.Z}, a simple numeric vector can be specified. Note that, in addition to the values specified here, \code{stepmented} will estimate additional breakpoints. To keep fixed all breakpoints (to be specified in \code{psi}) use \code{it.max=0} in \code{\link{seg.control}}
}
\item{control}{ a list of parameters for controlling the fitting process.
See the documentation for \code{\link{seg.control}} for details.
}
%\item{model}{logical value indicating if the model.frame should be returned.}
\item{keep.class}{logical value indicating if the final fit returned by \code{stepmented.default} should keep the class '\code{stepmented}' (along with the class of the original fit \code{obj}). Ignored by the stepmented methods.
}
\item{\dots}{ optional arguments (to be ignored safely). Notice specific arguments relevant to the original call (via \code{lm} or \code{glm} for instance), such as \code{weights} or \code{offet}, have to be included in the starting model \code{obj}.
}
\item{pertV}{
Only for \code{stepmented.ts} and \code{stepmented.numeric}.
}
\item{centerX}{
Only for \code{stepmented.ts} and \code{stepmented.numeric}. If \code{TRUE}, the covariate is centered before fitting.
}
\item{adjX}{
Only for \code{stepmented.ts} and \code{stepmented.numeric}. If the response vector leads to covariate with large values (such as years for ts objects), \code{adjX=TRUE} will shift the covariate to have a zero origin. Default is \code{NULL} which means \code{TRUE} if the minimum of covariate is 1000 or larger.
}
\item{var.psi}{
logical. If \code{TRUE}, the estimate covariance matrix is also computed via \code{\link{vcov.stepmented}}, thus the breakpoint standard errors are also included in the \code{psi} component of the returned object. Default is \code{FALSE}, as computing the estimate covariance matrix is somewhat time-consuming when the sample size is large.
}
\item{weights}{
possible weights to include in the estimation process (only for \code{stepmented.numeric}).
}
%\item{only.mean}{
% logical (only for \code{stepmented.numeric}). If \code{FALSE}, changepoints will be estimated even for the dispersion (variance) model. The number of changepoints to estimate in the two sub-models can be specified via \code{npsi} which can be a vector wherein
% \code{npsi[1]} refers to the mean model and \code{npsi[2]} to the variance model. If \code{npsi} is scalar, the same number of changepoints is estimated in the two submodels.
% }
}
\details{
Given a linear regression model (usually of class "lm" or "glm"), stepmented tries to estimate
a new regression model having piecewise-constant (i.e. step-function like) relationships with the variables specified in \code{seg.Z}.
A \emph{stepmented} relationship is defined by the mean level
parameters and the break-points where the mean level changes. The number of breakpoints
of each stepmented relationship depends on the \code{psi} argument, where initial
values for the break-points must be specified. The model
is estimated simultaneously yielding point estimates and relevant approximate
standard errors of all the model parameters, including the break-points.
\code{stepmented} implements the algorithm described in Fasola et al. (2018) along with bootstrap restarting
(Wood, 2001) to escape local optima. The procedure turns out to be particularly appealing and efficient
when there are two or more covariates exhibiting different change points to be estimated.
See also section `Note' below.
}
\value{
The returned object is of class "stepmented" which inherits
from the class "lm" or "glm" depending on the class of \code{obj}. When \code{only.mean=FALSE}, it is a list having two 'stepmented' fits (for the mean and for the dispersion submodels). \cr
An object of class "stepmented" is a list containing the components of the
original object \code{obj} with additionally the followings:
\item{psi}{estimated break-points and relevant (approximate) standard errors (on the continuum)}
\item{psi.rounded}{the rounded estimated break-points (see Note, below)}
\item{it}{number of iterations employed}
\item{epsilon}{difference in the objective function when the algorithm stops}
\item{model}{the model frame}
\item{psi.history}{a list or a vector including the breakpoint estimates at each step}
\item{seed}{the integer vector containing the seed just before the bootstrap resampling.
Returned only if bootstrap restart is employed}
\item{..}{Other components are not of direct interest of the user}
}
%\section{ Warning }{
%It is well-known that the log-likelihood function for the
%break-point may be not concave, especially
%for poor clear-cut kink-relationships. In these circumstances, the initial guess
% for the break-point, i.e. the \code{psi} argument, must be provided with care.
% For instance visual
%inspection of a, possibly smoothed, scatter-plot is usually a good way to obtain some idea on breakpoint location.
%However bootstrap restarting, implemented since version 0.2-9.0, is relatively more robust to starting values specified
%in \code{psi}. Alternatively an automatic procedure may be implemented by specifying \code{psi=NA} and \code{fix.npsi=FALSE} in %\code{\link{seg.control}}: experience suggests to increase the number of iterations via \code{it.max} in \code{seg.control()}. This automatic procedure, however, is expected to overestimate the number of breakpoints.
%}
\note{
Assuming a single changepoint \eqn{\psi} for the covariate \eqn{x}, the underlying fitted stepmented relationship is
\eqn{\beta_0+\beta_1 \ I(x>\psi)}, namely the fitted value (on the linear predictor scale) is \eqn{\beta_0} if \eqn{x\le \psi}, and \eqn{\beta_0+\beta_1} when
\eqn{x > \psi}. While the point estimate \eqn{\hat\psi} returned (in the \code{psi} component of the fit object)
is a unique real number, actually there exist infinite solutions in the range \eqn{[a, b)} where the extremes are the two
closest \emph{observed} covariate values
such hat \eqn{a \le \hat\psi<b}. The component \code{psi.rounded} of the fit object includes the rounded changepoint values
which can be taken as the final estimates. More specifically, each column of \code{psi.rounded}
represents a changepoint and the corresponding rows are the extremes of the `optimal' interval \eqn{[a, b)}.
The first row, i.e. the lower bound of the interval (\eqn{a} in the above example),
is taken as point estimate. \code{print.stepmented}, \code{print.summary.stepmented},
and \code{confint.stepmented} return the rounded (lower) value of the interval.
Also:
\enumerate{
\item The algorithm will start if the \code{it.max} argument returned by \code{seg.control}
is greater than zero. If \code{it.max=0} \code{stepmented} will estimate a new linear model with
break-point(s) fixed at the starting values reported in \code{psi}. Alternatively, it is also possible to set \code{h=0} in \code{seg.control()}. In this case, bootstrap restarting is unncessary, then to have changepoints at \code{mypsi} type \cr
\code{stepmented(.., psi=mypsi, control=seg.control(h=0, n.boot=0, it.max=1))}
\item In the returned fit object, `U.' is put before the name of the stepmented
variable to indicate the difference in the mean levels. \code{\link{slope}} can be used to compute the actual mean levels corresponding to the different intervals.
\item Currently methods specific to the class \code{"stepmented"} are
\itemize{
\item \code{\link{print.stepmented}}
\item \code{\link{summary.stepmented}}
\item \code{\link{print.summary.stepmented}}
\item \code{\link{plot.stepmented}}
\item \code{\link{confint.stepmented}}
\item \code{\link{vcov.stepmented}}
\item \code{\link{lines.stepmented}}
%\item \code{\link{predict.stepmented}}
%\item \code{\link{points.stepmented}}
%\item \code{\link{coef.stepmented}}
}
Others are inherited from the class \code{"lm"} or \code{"glm"} depending on the
class of \code{obj}.
}
}
\references{
Fasola S, Muggeo VMR, Kuchenhoff H (2018) A heuristic, iterative algorithm for change-point detection in abrupt change models, \emph{Computational Statistics} \bold{33}, 997--1015
}
\author{ Vito M. R. Muggeo, \email{vito.muggeo@unipa.it} (based on original code by Salvatore Fasola)}
\seealso{ \code{\link{segmented}} for segmented regression, \code{\link{lm}}, \code{\link{glm}} }
\examples{
n=20
x<-1:n/n
mu<- 2+ 1*(x>.6)
y<- mu + rnorm(n)*.8
#fitting via regression model
os <-stepmented(lm(y~1),~x)
y<-ts(y)
os1<- stepmented(y) #the 'ts' method
os2<- stepmented(y, npsi=2)
#plot(y)
#plot(os1, add=TRUE)
#plot(os2, add=TRUE, col=3:5)
### Example with (poisson) GLM
y<- rpois(n,exp(mu))
o<-stepmented(glm(y~1,family=poisson))
plot(o, res=TRUE)
\dontrun{
## Example using the (well-known) Nile dataset
data(Nile)
plot(Nile)
os<- stepmented(Nile)
plot(os, add=TRUE)
### Example with (binary) GLM (example from the package stepR)
set.seed(1234)
y <- rbinom(200, 1, rep(c(0.1, 0.7, 0.3, 0.9), each=50))
o<-stepmented(glm(y~1,family=binomial), npsi=3)
plot(o, res=TRUE)
### Two stepmented covariates (with 1 and 2 breakpoints); z has also an additional linear effect
n=100
x<-1:n/n
z<-runif(n,2,5)
mu<- 2+ 1*(x>.6)-2*(z>3)+3*(z>4)+z
y<- mu + rnorm(n)*.8
os <-stepmented(lm(y~z),~x+z, npsi=c(x=1,z=2))
os
summary(os)
## see ?plot.stepmented
}
}
%# An example using the Arima method:
%\dontrun{
%n<-50
%idt <-1:n #the time index
%
%mu<-50-idt +1.5*pmax(idt-30,0)
%set.seed(6969)
%y<-mu+arima.sim(list(ar=.5),n)*3.5
%
%o<-arima(y, c(1,0,0), xreg=idt)
%os1<-stepmented(o, ~idt, control=seg.control(display=TRUE))
%
%#note using the .coef argument is mandatory!
%slope(os1, .coef=os1$coef)
%plot(y)
%plot(os1, add=TRUE, .coef=os1$coef, col=2)
%}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{regression}
\keyword{nonlinear }
|