1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
|
### Functions to handle seasonality
#' Number of days in each season
#'
#' Returns number of days in each month or quarter of the observed time period.
#'
#' Useful for month length adjustments
#'
#' @param x time series
#' @return Time series
#' @author Rob J Hyndman
#' @seealso \code{\link[forecast]{bizdays}}
#' @keywords ts
#' @examples
#'
#' par(mfrow=c(2,1))
#' plot(ldeaths,xlab="Year",ylab="pounds",
#' main="Monthly deaths from lung disease (UK)")
#' ldeaths.adj <- ldeaths/monthdays(ldeaths)*365.25/12
#' plot(ldeaths.adj,xlab="Year",ylab="pounds",
#' main="Adjusted monthly deaths from lung disease (UK)")
#'
#' @export
monthdays <- function(x) {
if (!is.ts(x)) {
stop("Not a time series")
}
f <- frequency(x)
if (f == 12) {
days <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
} else if (f == 4) {
days <- c(90, 91, 92, 92)
} else {
stop("Not monthly or quarterly data")
}
nyears <- round(length(x) / f + 1) + 1
years <- (1:nyears) + (start(x)[1] - 1)
leap.years <- ((years %% 4 == 0) & !(years %% 100 == 0 & years %% 400 != 0))[1:nyears]
dummy <- t(matrix(rep(days, nyears), nrow = f))
if (f == 12) {
dummy[leap.years, 2] <- 29
} else {
dummy[leap.years, 1] <- 91
}
xx <- c(t(dummy))[start(x)[2] - 1 + (1:length(x))]
return(ts(xx, start = start(x), frequency = f))
}
#' Forecast seasonal index
#'
#' Returns vector containing the seasonal index for \code{h} future periods. If
#' the seasonal index is non-periodic, it uses the last values of the index.
#'
#'
#' @param object Output from \code{\link[stats]{decompose}} or
#' \link[stats]{stl}.
#' @param h Number of periods ahead to forecast
#' @return Time series
#' @author Rob J Hyndman
#' @keywords ts
#' @examples
#' uk.stl <- stl(UKDriverDeaths,"periodic")
#' uk.sa <- seasadj(uk.stl)
#' uk.fcast <- holt(uk.sa,36)
#' seasf <- sindexf(uk.stl,36)
#' uk.fcast$mean <- uk.fcast$mean + seasf
#' uk.fcast$lower <- uk.fcast$lower + cbind(seasf,seasf)
#' uk.fcast$upper <- uk.fcast$upper + cbind(seasf,seasf)
#' uk.fcast$x <- UKDriverDeaths
#' plot(uk.fcast,main="Forecasts from Holt's method with seasonal adjustment")
#'
#' @export
sindexf <- function(object, h) {
if ("stl" %in% class(object)) {
ss <- object$time.series[, 1]
m <- frequency(ss)
ss <- ss[length(ss) - (m:1) + 1]
tsp.x <- tsp(object$time.series)
}
else if ("decomposed.ts" %in% class(object)) {
ss <- object$figure
m <- frequency(object$seasonal)
n <- length(object$trend)
ss <- rep(ss, n / m + 1)[1:n]
ss <- ss[n - (m:1) + 1]
tsp.x <- tsp(object$seasonal)
}
else {
stop("Object of unknown class")
}
out <- ts(rep(ss, h / m + 1)[1:h], frequency = m, start = tsp.x[2] + 1 / m)
return(out)
}
#' Seasonal dummy variables
#'
#' \code{seasonaldummy} returns a matrix of dummy variables suitable for use in
#' \code{\link{Arima}}, \code{\link{auto.arima}} or \code{\link{tslm}}. The
#' last season is omitted and used as the control.
#'
#' \code{seasonaldummyf} is deprecated, instead use the \code{h} argument in
#' \code{seasonaldummy}.
#'
#' The number of dummy variables is determined from the time series
#' characteristics of \code{x}. When \code{h} is missing, the length of
#' \code{x} also determines the number of rows for the matrix returned by
#' \code{seasonaldummy}. the value of \code{h} determines the number of rows
#' for the matrix returned by \code{seasonaldummy}, typically used for
#' forecasting. The values within \code{x} are not used.
#'
#' @param x Seasonal time series: a \code{ts} or a \code{msts} object
#' @param h Number of periods ahead to forecast (optional)
#' @return Numerical matrix.
#' @author Rob J Hyndman
#' @seealso \code{\link{fourier}}
#' @keywords ts
#' @examples
#'
#' plot(ldeaths)
#'
#' # Using seasonal dummy variables
#' month <- seasonaldummy(ldeaths)
#' deaths.lm <- tslm(ldeaths ~ month)
#' tsdisplay(residuals(deaths.lm))
#' ldeaths.fcast <- forecast(deaths.lm,
#' data.frame(month=I(seasonaldummy(ldeaths,36))))
#' plot(ldeaths.fcast)
#'
#' # A simpler approach to seasonal dummy variables
#' deaths.lm <- tslm(ldeaths ~ season)
#' ldeaths.fcast <- forecast(deaths.lm, h=36)
#' plot(ldeaths.fcast)
#'
#' @export
seasonaldummy <- function(x, h=NULL) {
if (!is.ts(x)) {
stop("Not a time series")
} else {
fr.x <- frequency(x)
}
if (is.null(h)) {
if (fr.x == 1) {
stop("Non-seasonal time series")
}
dummy <- as.factor(cycle(x))
dummy.mat <- matrix(0, ncol = frequency(x) - 1, nrow = length(x))
nrow <- 1:length(x)
for (i in 1:(frequency(x) - 1))
dummy.mat[dummy == paste(i), i] <- 1
colnames(dummy.mat) <- if (fr.x == 12) {
month.abb[1:11]
} else if (fr.x == 4) {
c("Q1", "Q2", "Q3")
} else {
paste("S", 1:(fr.x - 1), sep = "")
}
return(dummy.mat)
}
else {
return(seasonaldummy(ts(rep(0, h), start = tsp(x)[2] + 1 / fr.x, frequency = fr.x)))
}
}
#' @rdname seasonaldummy
#' @export
seasonaldummyf <- function(x, h) {
warning("seasonaldummyf() is deprecated, please use seasonaldummy()")
if (!is.ts(x)) {
stop("Not a time series")
}
f <- frequency(x)
return(seasonaldummy(ts(rep(0, h), start = tsp(x)[2] + 1 / f, frequency = f)))
}
#' Fourier terms for modelling seasonality
#'
#' \code{fourier} returns a matrix containing terms from a Fourier series, up
#' to order \code{K}, suitable for use in \code{\link{Arima}},
#' \code{\link{auto.arima}}, or \code{\link{tslm}}.
#'
#' \code{fourierf} is deprecated, instead use the \code{h} argument in
#' \code{fourier}.
#'
#' The period of the Fourier terms is determined from the time series
#' characteristics of \code{x}. When \code{h} is missing, the length of
#' \code{x} also determines the number of rows for the matrix returned by
#' \code{fourier}. Otherwise, the value of \code{h} determines the number of
#' rows for the matrix returned by \code{fourier}, typically used for
#' forecasting. The values within \code{x} are not used.
#'
#' Typical use would omit \code{h} when generating Fourier terms for training a model
#' and include \code{h} when generating Fourier terms for forecasting.
#'
#' When \code{x} is a \code{ts} object, the value of \code{K} should be an
#' integer and specifies the number of sine and cosine terms to return. Thus,
#' the matrix returned has \code{2*K} columns.
#'
#' When \code{x} is a \code{msts} object, then \code{K} should be a vector of
#' integers specifying the number of sine and cosine terms for each of the
#' seasonal periods. Then the matrix returned will have \code{2*sum(K)}
#' columns.
#'
#' @param x Seasonal time series: a \code{ts} or a \code{msts} object
#' @param K Maximum order(s) of Fourier terms
#' @param h Number of periods ahead to forecast (optional)
#' @return Numerical matrix.
#' @author Rob J Hyndman
#' @seealso \code{\link{seasonaldummy}}
#' @keywords ts
#' @examples
#'
#' library(ggplot2)
#'
#' # Using Fourier series for a "ts" object
#' # K is chosen to minimize the AICc
#' deaths.model <- auto.arima(USAccDeaths, xreg=fourier(USAccDeaths,K=5), seasonal=FALSE)
#' deaths.fcast <- forecast(deaths.model, xreg=fourier(USAccDeaths, K=5, h=36))
#' autoplot(deaths.fcast) + xlab("Year")
#'
#' # Using Fourier series for a "msts" object
#' taylor.lm <- tslm(taylor ~ fourier(taylor, K = c(3, 3)))
#' taylor.fcast <- forecast(taylor.lm,
#' data.frame(fourier(taylor, K = c(3, 3), h = 270)))
#' autoplot(taylor.fcast)
#'
#' @export
fourier <- function(x, K, h=NULL) {
if (is.null(h)) {
return(...fourier(x, K, 1:NROW(x)))
}
else {
return(...fourier(x, K, NROW(x) + (1:h)))
}
}
#' @rdname fourier
#' @export
fourierf <- function(x, K, h) {
warning("fourierf() is deprecated, please use fourier()")
return(...fourier(x, K, length(x) + (1:h)))
}
# Function to do the work.
...fourier <- function(x, K, times) {
if (any(class(x) == "msts")) {
period <- attr(x, "msts")
} else {
period <- frequency(x)
}
# Patch for older versions of R that do not have sinpi and cospi functions.
if (!exists("sinpi")) {
sinpi <- function(x) {
sin(pi * x)
}
cospi <- function(x) {
cos(pi * x)
}
}
if (length(period) != length(K)) {
stop("Number of periods does not match number of orders")
}
if (any(2 * K > period)) {
stop("K must be not be greater than period/2")
}
# Compute periods of all Fourier terms
p <- numeric(0)
labels <- character(0)
for (j in seq_along(period))
{
if (K[j] > 0) {
p <- c(p, (1:K[j]) / period[j])
labels <- c(labels, paste(
paste0(c("S", "C"), rep(1:K[j], rep(2, K[j]))),
round(period[j]), sep = "-"
))
}
}
# Remove equivalent seasonal periods due to multiple seasonality
k <- duplicated(p)
p <- p[!k]
labels <- labels[!rep(k, rep(2, length(k)))]
# Remove columns where sinpi=0
k <- abs(2 * p - round(2 * p)) > .Machine$double.eps
# Compute matrix of Fourier terms
X <- matrix(NA_real_, nrow = length(times), ncol = 2L * length(p))
for (j in seq_along(p))
{
if (k[j]) {
X[, 2L * j - 1L] <- sinpi(2 * p[j] * times)
}
X[, 2L * j] <- cospi(2 * p[j] * times)
}
colnames(X) <- labels
# Remove missing columns
X <- X[, !is.na(colSums(X)), drop = FALSE]
return(X)
}
#' Moving-average smoothing
#'
#' \code{ma} computes a simple moving average smoother of a given time series.
#'
#' The moving average smoother averages the nearest \code{order} periods of
#' each observation. As neighbouring observations of a time series are likely
#' to be similar in value, averaging eliminates some of the randomness in the
#' data, leaving a smooth trend-cycle component. \deqn{\hat{T}_{t} =
#' \frac{1}{m} \sum_{j=-k}^k
#' y_{t+j}}{T[t]=1/m(y[t-k]+y[t-k+1]+\ldots+y[t]+\ldots+y[t+k-1]+y[t+k])} where
#' \eqn{k=\frac{m-1}{2}}{k=(m-1)/2}
#'
#' When an even \code{order} is specified, the observations averaged will
#' include one more observation from the future than the past (k is rounded
#' up). If centre is TRUE, the value from two moving averages (where k is
#' rounded up and down respectively) are averaged, centering the moving
#' average.
#'
#' @param x Univariate time series
#' @param order Order of moving average smoother
#' @param centre If TRUE, then the moving average is centred for even orders.
#' @return Numerical time series object containing the simple moving average
#' smoothed values.
#' @author Rob J Hyndman
#' @seealso \code{\link[stats]{decompose}}
#' @keywords ts
#' @examples
#'
#' plot(wineind)
#' sm <- ma(wineind,order=12)
#' lines(sm,col="red")
#'
#' @export
ma <- function(x, order, centre=TRUE) {
if (abs(order - round(order)) > 1e-8) {
stop("order must be an integer")
}
if (order %% 2 == 0 && centre) { # centred and even
w <- c(0.5, rep(1, order - 1), 0.5) / order
} else { # odd or not centred
w <- rep(1, order) / order
}
return(filter(x, w))
}
|