1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
% $Id: running.Rd 1433 2010-05-01 22:03:03Z warnes $
%
\name{running}
\alias{running}
\title{Apply a Function Over Adjacent Subsets of a Vector}
\description{Applies a function over subsets of the vector(s) formed by
taking a fixed number of previous points.}
\usage{
running(X, Y=NULL, fun=mean, width=min(length(X), 20),
allow.fewer=FALSE, pad=FALSE, align=c("right", "center","left"),
simplify=TRUE, by, ...)
}
\arguments{
\item{X}{ data vector }
\item{Y}{ data vector (optional) }
\item{fun}{ Function to apply. Default is \code{mean}}
\item{width}{Integer giving the number of vector elements to include
in the subsets. Defaults to the lesser of the length of the data and
20 elements.}
\item{allow.fewer}{Boolean indicating whether the function should be
computed for subsets with fewer than \code{width} points}
\item{pad}{Boolean indicating whether the returned results should
be 'padded' with NAs corresponding to sets with less than
\code{width} elements. This only applies when when
\code{allow.fewer} is FALSE.}
\item{align}{One of "right", "center", or "left".
This controls the relative location of `short' subsets with less
then \code{width} elements: "right" allows short subsets only at the
beginning of the sequence so that all of the complete subsets are at
the end of the sequence (i.e. `right aligned'), "left" allows short
subsets only at the end of the data so that the complete subsets
are `left aligned', and "center" allows short subsets at both ends
of the data so that complete subsets are `centered'.
}
\item{simplify}{Boolean. If FALSE the returned object will be a list
containing one element per evaluation. If TRUE, the returned
object will be coerced into a vector (if the computation returns a
scalar) or a matrix (if the computation returns multiple values).
Defaults to FALSE.}
\item{by}{Integer separation between groups. If \code{by=width} will
give non-overlapping windows. Default is missing, in which case
groups will start at each value in the X/Y range.}
\item{\dots}{ parameters to be passed to \code{fun} }
}
\details{
\code{running} applies the specified function to
a sequential windows on \code{X} and (optionally) \code{Y}. If
\code{Y} is specified the function must be bivariate.
}
\value{
List (if \code{simplify==TRUE}), vector, or matrix containg the
results of applying the function \code{fun} to the
subsets of \code{X} (\code{running}) or \code{X} and \code{Y}.
Note that this function will create a vector or matrix even for
objects which are not simplified by \code{sapply}.
}
\author{ Gregory R. Warnes \email{greg@warnes.net},
with contributions by Nitin Jain \email{nitin.jain@pfizer.com}.}
\seealso{ \code{\link[gplots]{wapply}} to apply a function over an x-y window
centered at each x point, \code{\link[base]{sapply}},
\code{\link[base]{lapply}} }
\examples{
# show effect of pad
running(1:20, width=5)
running(1:20, width=5, pad=TRUE)
# show effect of align
running(1:20, width=5, align="left", pad=TRUE)
running(1:20, width=5, align="center", pad=TRUE)
running(1:20, width=5, align="right", pad=TRUE)
# show effect of simplify
running(1:20, width=5, fun=function(x) x ) # matrix
running(1:20, width=5, fun=function(x) x, simplify=FALSE) # list
# show effect of by
running(1:20, width=5) # normal
running(1:20, width=5, by=5) # non-overlapping
running(1:20, width=5, by=2) # starting every 2nd
# Use 'pad' to ensure correct length of vector, also show the effect
# of allow.fewer.
par(mfrow=c(2,1))
plot(1:20, running(1:20, width=5, allow.fewer=FALSE, pad=TRUE), type="b")
plot(1:20, running(1:20, width=5, allow.fewer=TRUE, pad=TRUE), type="b")
par(mfrow=c(1,1))
# plot running mean and central 2 standard deviation range
# estimated by *last* 40 observations
dat <- rnorm(500, sd=1 + (1:500)/500 )
plot(dat)
sdfun <- function(x,sign=1) mean(x) + sign * sqrt(var(x))
lines(running(dat, width=51, pad=TRUE, fun=mean), col="blue")
lines(running(dat, width=51, pad=TRUE, fun=sdfun, sign=-1), col="red")
lines(running(dat, width=51, pad=TRUE, fun=sdfun, sign= 1), col="red")
# plot running correlation estimated by last 40 observations (red)
# against the true local correlation (blue)
sd.Y <- seq(0,1,length=500)
X <- rnorm(500, sd=1)
Y <- rnorm(500, sd=sd.Y)
plot(running(X,X+Y,width=20,fun=cor,pad=TRUE),col="red",type="s")
r <- 1 / sqrt(1 + sd.Y^2) # true cor of (X,X+Y)
lines(r,type="l",col="blue")
}
\keyword{misc}
|