File: runminmax.Rd

package info (click to toggle)
catools 1.10-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 356 kB
  • ctags: 74
  • sloc: ansic: 650; cpp: 640; makefile: 5
file content (232 lines) | stat: -rwxr-xr-x 10,213 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
\name{runmin & runmax}
\alias{runmin}
\alias{runmax}
\title{Minimum and Maximum of Moving Windows}
\description{Moving (aka running, rolling) Window Minimum and Maximum 
  calculated over a vector  }
\usage{
  runmin(x, k, alg=c("C", "R"),
         endrule=c("min", "NA", "trim", "keep", "constant", "func"),
         align = c("center", "left", "right"))
  runmax(x, k, alg=c("C", "R"),
         endrule=c("max", "NA", "trim", "keep", "constant", "func"),
         align = c("center", "left", "right"))
}

\arguments{
  \item{x}{numeric vector of length n or matrix with n rows. If \code{x} is a 
    matrix than each column will be processed separately.}
  \item{k}{width of moving window; must be an integer between one and n }
  \item{endrule}{character string indicating how the values at the beginning 
    and the end, of the array, should be treated. Only first and last \code{k2} 
    values at both ends are affected, where \code{k2} is the half-bandwidth 
    \code{k2 = k \%/\% 2}.
     \itemize{
       \item \code{"min"} & \code{"max"} - applies the underlying function to 
       smaller and smaller sections of the array. In case of min equivalent to: 
       \code{for(i in 1:k2) out[i]=min(x[1:(i+k2)])}. Default.
       \item \code{"trim"} - trim the ends; output array length is equal to 
         \code{length(x)-2*k2 (out = out[(k2+1):(n-k2)])}. This option mimics 
         output of \code{\link{apply}} \code{(\link{embed}(x,k),1,FUN)} and other 
         related functions.
       \item \code{"keep"} - fill the ends with numbers from \code{x} vector 
         \code{(out[1:k2] = x[1:k2])}
       \item \code{"constant"} - fill the ends with first and last calculated 
         value in output array \code{(out[1:k2] = out[k2+1])}
       \item \code{"NA"} - fill the ends with NA's \code{(out[1:k2] = NA)}
       \item \code{"func"} - same as \code{"min"} & \code{"max"} but implimented
       in R. This option could be very slow, and is included mostly for testing
     }
     Similar to \code{endrule} in \code{\link{runmed}} function which has the 
     following options: \dQuote{\code{c("median", "keep", "constant")}} .
  }
  \item{alg}{an option allowing to choose different algorithms or 
    implementations. Default is to use of code written in C (option \code{alg="C"}).
    Option \code{alg="R"} will use slower code written in R. Useful for 
    debugging and studying the algorithm.}
  \item{align}{specifies whether result should be centered (default), 
  left-aligned or right-aligned.  If \code{endrule}="min" or "max" then setting
  \code{align} to "left" or "right" will fall back on slower implementation 
  equivalent to \code{endrule}="func". }
}

\details{
  Apart from the end values, the result of y = runFUN(x, k) is the same as 
  \dQuote{\code{for(j=(1+k2):(n-k2)) y[j]=FUN(x[(j-k2):(j+k2)], na.rm = TRUE)}}, where FUN 
  stands for min or max functions.  Both functions can handle non-finite 
  numbers like NaN's and Inf's the same way as \code{\link{min}(x, na.rm = TRUE)}).
   

  The main incentive to write this set of functions was relative slowness of 
  majority of moving window functions available in R and its packages.  With the 
  exception of \code{\link{runmed}}, a running window median function, all 
  functions listed in "see also" section are slower than very inefficient 
  \dQuote{\code{\link{apply}(\link{embed}(x,k),1,FUN)}} approach. Relative 
  speeds \code{runmin} and \code{runmax} functions is O(n) in best and average 
  case and O(n*k) in worst case.
    
  Both functions work with infinite numbers (\code{NA},\code{NaN},\code{Inf},
  \code{-Inf}). Also default \code{endrule} is hardwired in C for speed.
}

\value{
  Returns a numeric vector or matrix of the same size as \code{x}. Only in case of 
  \code{endrule="trim"} the output vectors will be shorter and output matrices 
  will have fewer rows. 
}

\author{Jarek Tuszynski (SAIC) \email{jaroslaw.w.tuszynski@saic.com}}

\seealso{
  Links related to:
  \itemize{       
   \item Other moving window functions  from this package: \code{\link{runmean}}, 
    \code{\link{runquantile}}, \code{\link{runmad}} and \code{\link{runsd}}  
   \item R functions: \code{\link{runmed}}, \code{\link{min}}, \code{\link{max}}
   \item Similar functions in other packages: \code{\link[fSeries]{rollMin}} 
     and \code{\link[fSeries]{rollMax}} from \pkg{fSeries} library
     \code{\link[zoo]{rollmax}} from \pkg{zoo} library
   \item Generic running window functions: \code{\link{apply}}\code{
     (\link{embed}(x,k), 1, FUN)} (fastest), \code{\link[fSeries]{rollFun}} 
     from \pkg{fSeries} (slow), \code{\link[gtools]{running}} from \pkg{gtools} 
     package (extremely slow for this purpose), \code{\link[zoo]{rapply}} from 
     \pkg{zoo} library, \code{\link[magic]{subsums}} from 
     \pkg{magic} library can perform running window operations on data with any 
     dimensions. 
  }
}

\examples{
  # show plot using runmin, runmax and runmed
  k=25; n=200;
  x = rnorm(n,sd=30) + abs(seq(n)-n/4)
  col = c("black", "red", "green", "blue", "magenta", "cyan")
  plot(x, col=col[1], main = "Moving Window Analysis Functions")
  lines(runmin(x,k), col=col[2])
  lines(runmean(x,k), col=col[3])
  lines(runmax(x,k), col=col[4])
  legend(0,.9*n, c("data", "runmin", "runmean", "runmax"), col=col, lty=1 )

  # basic tests against standard R approach
  a = runmin(x,k, endrule="trim") # test only the inner part 
  b = apply(embed(x,k), 1, min)   # Standard R running min
  stopifnot(all(a==b));
  a = runmax(x,k, endrule="trim") # test only the inner part
  b = apply(embed(x,k), 1, max)   # Standard R running min
  stopifnot(all(a==b));
  
  # test against loop approach
  k=25; 
  data(iris)
  x = iris[,1]
  n = length(x)
  x[seq(1,n,11)] = NaN;                # add NANs
  k2 = k%/%2
  k1 = k-k2-1
  a1 = runmin(x, k)
  a2 = runmax(x, k)
  b1 = array(0,n)
  b2 = array(0,n)
  for(j in 1:n) {
    lo = max(1, j-k1)
    hi = min(n, j+k2)
    b1[j] = min(x[lo:hi], na.rm = TRUE)
    b2[j] = max(x[lo:hi], na.rm = TRUE)
  }
  # this test works fine at the R prompt but fails during package check - need to investigate
  \dontrun{ 
  stopifnot(all(a1==b1, na.rm=TRUE));
  stopifnot(all(a2==b2, na.rm=TRUE));
  }
  
  # Test if moving windows forward and backward gives the same results
  # Two data sets also corespond to best and worst-case scenatio data-sets
  k=51; n=200;
  a = runmin(n:1, k) 
  b = runmin(1:n, k)
  stopifnot(all(a[n:1]==b, na.rm=TRUE));
  a = runmax(n:1, k)
  b = runmax(1:n, k)
  stopifnot(all(a[n:1]==b, na.rm=TRUE));

  # test vector vs. matrix inputs, especially for the edge handling
  nRow=200; k=25; nCol=10
  x = rnorm(nRow,sd=30) + abs(seq(nRow)-n/4)
  x[seq(1,nRow,10)] = NaN;              # add NANs
  X = matrix(rep(x, nCol ), nRow, nCol) # replicate x in columns of X
  a = runmax(x, k)
  b = runmax(X, k)
  stopifnot(all(a==b[,1], na.rm=TRUE));        # vector vs. 2D array
  stopifnot(all(b[,1]==b[,nCol], na.rm=TRUE)); # compare rows within 2D array
  a = runmin(x, k)
  b = runmin(X, k)
  stopifnot(all(a==b[,1], na.rm=TRUE));        # vector vs. 2D array
  stopifnot(all(b[,1]==b[,nCol], na.rm=TRUE)); # compare rows within 2D array

  # Compare C and R algorithms to each other for extreme window sizes
  numeric.test = function (x, k) {
    a = runmin( x, k, alg="C")
    b = runmin( x, k, alg="R")
    c =-runmax(-x, k, alg="C")
    d =-runmax(-x, k, alg="R")
    stopifnot(all(a==b, na.rm=TRUE));
    #stopifnot(all(c==d, na.rm=TRUE)); 
    #stopifnot(all(a==c, na.rm=TRUE));
    stopifnot(all(b==d, na.rm=TRUE));
  }
  n=200;                               # n is an even number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size
  n=201;                               # n is an odd number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size
  n=200;                               # n is an even number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  x[seq(1,200,10)] = NaN;              # with some NaNs
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size
  n=201;                               # n is an odd number
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # random data
  x[seq(1,200,2)] = NaN;               # with some NaNs
  for(i in 1:5) numeric.test(x, i)     # test for small window size
  for(i in 1:5) numeric.test(x, n-i+1) # test for large window size

  # speed comparison
  \dontrun{
  n = 1e7;  k=991; 
  x1 = runif(n);                       # random data - average case scenario
  x2 = 1:n;                            #  best-case scenario data for runmax
  x3 = n:1;                            # worst-case scenario data for runmax
  system.time( runmax( x1,k,alg="C"))  # C alg on average data O(n)
  system.time( runmax( x2,k,alg="C"))  # C alg on  best-case data O(n)
  system.time( runmax( x3,k,alg="C"))  # C alg on worst-case data O(n*k)
  system.time(-runmin(-x1,k,alg="C"))  # use runmin to do runmax work
  system.time( runmax( x1,k,alg="R"))  # R version of the function
  x=runif(1e5); k=1e2;                 # reduce vector and window sizes
  system.time(runmax(x,k,alg="R"))     # R version of the function
  system.time(apply(embed(x,k), 1, max)) # standard R approach 
  }
}

\keyword{ts}
\keyword{smooth}
\keyword{array}
\keyword{utilities}
\concept{moving min}
\concept{rolling min}
\concept{running min}
\concept{moving max}
\concept{rolling max}
\concept{running max}
\concept{moving minimum}
\concept{rolling minimum}
\concept{running minimum}
\concept{moving maximum}
\concept{rolling maximum}
\concept{running maximum}
\concept{running window}
\concept{moving window}
\concept{rolling window}