File: stepmented.Rd

package info (click to toggle)
r-cran-segmented 2.2-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,524 kB
  • sloc: makefile: 2
file content (256 lines) | stat: -rw-r--r-- 14,190 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
\name{stepmented}
\alias{stepmented}
\alias{stepmented.lm}
\alias{stepmented.glm}
\alias{stepmented.ts}
\alias{stepmented.numeric}
%\alias{stepmented.default}
%\alias{stepmented.Arima}
%\alias{print.stepmented}
%\alias{summary.stepmented}
%\alias{print.summary.stepmented}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
stepmented relationships in regression models
}
\description{
  Fits regression models with stepmented (i.e. piecewise-constant) relationships between the response and one or more explanatory variables. Break-point estimates are provided.
}
\usage{
stepmented(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), 
   keep.class=FALSE, var.psi=FALSE, ...)
%\method{stepmented}{default}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(), model = TRUE, keep.class=FALSE, ...)
\method{stepmented}{lm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
   keep.class=FALSE, var.psi=FALSE, ...)

\method{stepmented}{glm}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
    keep.class=FALSE, var.psi=FALSE, ...)

\method{stepmented}{numeric}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
    keep.class=FALSE, var.psi=FALSE, ..., 
    pertV=0, centerX=FALSE, adjX=NULL, weights=NULL)

\method{stepmented}{ts}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
    keep.class=FALSE, var.psi=FALSE, ..., 
    pertV=0, centerX=FALSE, adjX=NULL)


%\method{stepmented}{Arima}(obj, seg.Z, psi, npsi, fixed.psi=NULL, control = seg.control(),
%    model = TRUE, keep.class=FALSE, ...)
}
%- ------------------------------>>>>>  "Arima"
\arguments{
\item{obj}{A standard `linear' regression model of class "lm" or "glm". Alternatively, a simple "ts" object or a simple data vector may be supplied. 
}
\item{seg.Z}{ the stepmented variables(s), i.e. the numeric covariate(s) understood to have a piecewise-constant relationship with response. It is a formula with no response variable, such as \code{seg.Z=~x} or \code{seg.Z=~x1+x2}. Currently, formulas involving functions, 
such as \code{seg.Z=~log(x1)}, or selection operators, such as \code{seg.Z=~d[,"x1"]} or \code{seg.Z=~d$x1}, are \emph{not} allowed. Also, variable names formed by \verb{U} or \verb{V} only (with or without numbers ) are not permitted. If missing, the index variable \code{id=1,2,..,n} is used. For \code{stepmented.ts}, \code{seg.Z} is usually unspecified as the (time) covariate is obtained by the \code{ts} object itself.
}
\item{psi}{ starting values for the breakpoints to be estimated. If there is a single stepmented variable specified in \code{seg.Z}, \code{psi} can be a numeric vector, and it can be missing  when 1 breakpoint has to be estimated (and the median of the stepmented variable is used as a starting value). If \code{seg.Z} includes several covariates, \code{psi} has to be specified as a \emph{named} list of vectors whose names have to match the variables in the \code{seg.Z} argument. Each vector of such list includes starting values for the break-point(s) for the corresponding variable in \code{seg.Z}. A \code{NA} value means that `\code{K}' quantiles (or equally spaced values) are used as starting values; \code{K} is fixed via the \code{\link{seg.control}} auxiliary function. 
      }
\item{npsi}{A named vector or list meaning the \emph{number} (and not locations) of breakpoints to be estimated. The starting values will be internally computed via the quantiles or equally spaced values, as specified in argument \code{quant} in \code{\link{seg.control}}. \code{npsi} can be missing and \code{npsi=1} is assumed for all variables specified in \code{seg.Z}. If \code{psi} is provided, \code{npsi} is ignored.}
\item{fixed.psi}{An optional named list including the breakpoint values to be kept fixed during the estimation procedure. The names should be a subset of (or even the same) variables specified in \code{seg.Z}. If there is a single variable in \code{seg.Z}, a simple numeric vector can be specified. Note that, in addition to the values specified here, \code{stepmented} will estimate additional breakpoints. To keep fixed all breakpoints (to be specified in \code{psi}) use \code{it.max=0} in \code{\link{seg.control}}
}
\item{control}{ a list of parameters for controlling the fitting process.
      See the documentation for \code{\link{seg.control}} for details. 
      }
  %\item{model}{logical value indicating if the model.frame should be returned.}
  \item{keep.class}{logical value indicating if the final fit returned by \code{stepmented.default} should keep the class '\code{stepmented}' (along with the class of the original fit \code{obj}). Ignored by the stepmented methods. 
  }
  \item{\dots}{ optional arguments (to be ignored safely). Notice specific arguments relevant to the original call (via \code{lm} or \code{glm} for instance), such as \code{weights} or \code{offet}, have to be included in the starting model \code{obj}.
}
  \item{pertV}{
  Only for \code{stepmented.ts} and \code{stepmented.numeric}.
  }
  \item{centerX}{
  Only for \code{stepmented.ts} and \code{stepmented.numeric}. If \code{TRUE}, the covariate is centered before fitting.
  }
  \item{adjX}{
  Only for \code{stepmented.ts} and \code{stepmented.numeric}. If the response vector leads to covariate with large values (such as years for ts objects), \code{adjX=TRUE} will shift the covariate to have a zero origin. Default is \code{NULL} which means \code{TRUE} if the minimum of covariate is 1000 or larger.
}
\item{var.psi}{
logical. If \code{TRUE}, the estimate covariance matrix is also computed via \code{\link{vcov.stepmented}}, thus the breakpoint standard errors are also included in the \code{psi} component of the returned object. Default is \code{FALSE}, as computing the estimate covariance matrix is somewhat time-consuming when the sample size is large. 
}
\item{weights}{
  possible weights to include in the estimation process (only for \code{stepmented.numeric}).
  }
%\item{only.mean}{
%  logical (only for \code{stepmented.numeric}). If \code{FALSE}, changepoints will be estimated even for the dispersion (variance) model. The number of changepoints to estimate in the two sub-models can be specified via \code{npsi} which can be a vector wherein
%  \code{npsi[1]} refers to the mean model and \code{npsi[2]} to the variance model. If \code{npsi} is scalar, the same number of changepoints is estimated in the two submodels.
 % }

}
\details{
  Given a linear regression model (usually of class "lm" or "glm"), stepmented tries to estimate
  a new regression model having piecewise-constant (i.e. step-function like) relationships with the variables specified in \code{seg.Z}.
  A \emph{stepmented} relationship is defined by the mean level
  parameters and the break-points where the mean level changes. The number of breakpoints
  of each stepmented relationship depends on the \code{psi} argument, where initial
  values for the break-points must be specified. The model
  is estimated simultaneously yielding point estimates and relevant approximate
   standard errors of all the model parameters, including the break-points.

  \code{stepmented} implements the algorithm described in Fasola et al. (2018) along with  bootstrap restarting 
  (Wood, 2001) to escape local optima. The procedure turns out to be particularly appealing and efficient
  when there are two or more covariates exhibiting different change points to be estimated.
  
  See also section `Note' below.

}
\value{
  The returned object is of class "stepmented" which inherits
  from the class "lm" or "glm" depending on the class of \code{obj}. When \code{only.mean=FALSE}, it is a list having two 'stepmented' fits (for the mean and for the dispersion submodels). \cr

An object of class "stepmented" is a list containing the components of the
original object \code{obj} with additionally the followings:
  \item{psi}{estimated break-points and relevant (approximate) standard errors (on the continuum)}
  \item{psi.rounded}{the rounded estimated break-points (see Note, below)}
  \item{it}{number of iterations employed}
  \item{epsilon}{difference in the objective function when the algorithm stops}
  \item{model}{the model frame}
  \item{psi.history}{a list or a vector including the breakpoint estimates at each step}
  \item{seed}{the integer vector containing the seed just before the bootstrap resampling. 
     Returned only if bootstrap restart is employed}
  \item{..}{Other components are not of direct interest of the user}
  }

%\section{ Warning }{
%It is well-known that the log-likelihood function for the 
%break-point may be not concave, especially 
%for poor clear-cut kink-relationships. In these circumstances, the initial guess
% for the break-point, i.e. the \code{psi} argument, must be provided with care. 
% For instance visual 
%inspection of a, possibly smoothed, scatter-plot is usually a good way to obtain some idea on breakpoint location. 
%However bootstrap restarting, implemented since version 0.2-9.0, is relatively more robust to starting values specified 
%in  \code{psi}. Alternatively an automatic procedure may be implemented by specifying \code{psi=NA} and \code{fix.npsi=FALSE} in %\code{\link{seg.control}}: experience suggests to increase the number of iterations via \code{it.max} in \code{seg.control()}. This automatic procedure, however, is expected to overestimate the number of breakpoints. 
%}

\note{
Assuming a single changepoint \eqn{\psi} for the covariate \eqn{x}, the underlying fitted stepmented relationship is 
\eqn{\beta_0+\beta_1 \ I(x>\psi)}, namely the fitted value (on the linear predictor scale) is \eqn{\beta_0} if \eqn{x\le \psi}, and \eqn{\beta_0+\beta_1} when 
\eqn{x > \psi}. While the point estimate \eqn{\hat\psi} returned (in the \code{psi} component of the fit object) 
is a unique real number, actually there exist infinite solutions in the range \eqn{[a, b)} where the extremes are the two 
closest \emph{observed} covariate values 
such hat \eqn{a \le \hat\psi<b}. The component \code{psi.rounded} of the fit object includes the rounded changepoint values 
which can be taken as the final estimates. More specifically, each column of \code{psi.rounded} 
represents a changepoint and the corresponding rows are the extremes of the `optimal' interval \eqn{[a, b)}. 
The first row, i.e. the lower bound of the interval (\eqn{a} in the above example), 
is taken as point estimate. \code{print.stepmented}, \code{print.summary.stepmented}, 
and \code{confint.stepmented} return the rounded (lower) value of the interval.

Also:
\enumerate{

\item The algorithm will start if the \code{it.max} argument returned by \code{seg.control}
  is greater than zero. If \code{it.max=0} \code{stepmented} will estimate a new linear model with
 break-point(s) fixed at the starting values reported in \code{psi}. Alternatively, it is also possible to set \code{h=0} in \code{seg.control()}. In this case, bootstrap restarting is unncessary, then to have changepoints at \code{mypsi} type \cr
 
 \code{stepmented(.., psi=mypsi, control=seg.control(h=0, n.boot=0, it.max=1))}


\item In the returned fit object, `U.' is put before the name of the stepmented 
variable to indicate the difference in the mean levels. \code{\link{slope}} can be used to compute the actual mean levels corresponding to the different intervals.


\item Currently methods specific to the class \code{"stepmented"} are
    \itemize{
  \item \code{\link{print.stepmented}}
  \item \code{\link{summary.stepmented}}
  \item \code{\link{print.summary.stepmented}}
  \item \code{\link{plot.stepmented}}
  \item \code{\link{confint.stepmented}}
  \item \code{\link{vcov.stepmented}}
  \item \code{\link{lines.stepmented}}
  %\item \code{\link{predict.stepmented}}
  %\item \code{\link{points.stepmented}}
  %\item \code{\link{coef.stepmented}}
            }
Others are inherited from the class \code{"lm"} or \code{"glm"} depending on the
 class of \code{obj}.

     }
}


\references{
Fasola S, Muggeo VMR, Kuchenhoff H (2018) A heuristic, iterative algorithm for change-point detection in abrupt change models, \emph{Computational Statistics} \bold{33},  997--1015
  }
 
\author{ Vito M. R. Muggeo, \email{vito.muggeo@unipa.it} (based on original code by Salvatore Fasola)}

\seealso{ \code{\link{segmented}} for segmented regression, \code{\link{lm}}, \code{\link{glm}} }

\examples{

n=20
x<-1:n/n
mu<- 2+ 1*(x>.6)
y<- mu + rnorm(n)*.8

#fitting via regression model
os <-stepmented(lm(y~1),~x)

y<-ts(y)
os1<- stepmented(y)  #the 'ts' method
os2<- stepmented(y, npsi=2)
#plot(y)
#plot(os1, add=TRUE)
#plot(os2, add=TRUE, col=3:5)


### Example with (poisson) GLM
y<- rpois(n,exp(mu))
o<-stepmented(glm(y~1,family=poisson))
plot(o, res=TRUE)

\dontrun{

## Example using the (well-known) Nile dataset
data(Nile)
plot(Nile)
os<- stepmented(Nile)
plot(os, add=TRUE)


### Example with (binary) GLM (example from the package stepR)
set.seed(1234)
y <- rbinom(200, 1, rep(c(0.1, 0.7, 0.3, 0.9), each=50))
o<-stepmented(glm(y~1,family=binomial), npsi=3)
plot(o, res=TRUE)

### Two stepmented covariates (with 1 and 2 breakpoints); z has also an additional linear effect
n=100
x<-1:n/n
z<-runif(n,2,5)
mu<- 2+ 1*(x>.6)-2*(z>3)+3*(z>4)+z
y<- mu + rnorm(n)*.8

os <-stepmented(lm(y~z),~x+z, npsi=c(x=1,z=2))
os
summary(os)

## see ?plot.stepmented
}
}

%# An example using the Arima method:
%\dontrun{
%n<-50
%idt <-1:n #the time index
%
%mu<-50-idt +1.5*pmax(idt-30,0)
%set.seed(6969)
%y<-mu+arima.sim(list(ar=.5),n)*3.5
%
%o<-arima(y, c(1,0,0), xreg=idt)
%os1<-stepmented(o, ~idt, control=seg.control(display=TRUE))
%
%#note using the .coef argument is mandatory!
%slope(os1, .coef=os1$coef)
%plot(y)
%plot(os1, add=TRUE, .coef=os1$coef, col=2)
%}

% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{regression}
\keyword{nonlinear }