File: runquantile.Rd

package info (click to toggle)
catools 1.10-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 356 kB
  • ctags: 74
  • sloc: ansic: 650; cpp: 640; makefile: 5
file content (257 lines) | stat: -rwxr-xr-x 11,152 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
\name{runquantile}
\alias{runquantile}
\title{Quantile of Moving Window}
\description{Moving (aka running, rolling) Window Quantile calculated over a vector}
\usage{
  runquantile(x, k, probs, type=7, 
         endrule=c("quantile", "NA", "trim", "keep", "constant", "func"),
         align = c("center", "left", "right"))
}

\arguments{
  \item{x}{numeric vector of length n or matrix with n rows. If \code{x} is a 
    matrix than each column will be processed separately.}
  \item{k}{width of moving window; must be an integer between one and n }
  \item{endrule}{character string indicating how the values at the beginning 
    and the end, of the array, should be treated. Only first and last \code{k2} 
    values at both ends are affected, where \code{k2} is the half-bandwidth 
    \code{k2 = k \%/\% 2}.
     \itemize{
       \item \code{"quantile"} - applies the \code{\link{quantile}} 
         function to smaller and smaller sections of the array. Equivalent to: 
         \code{for(i in 1:k2) out[i]=quantile(x[1:(i+k2)])}. 
       \item \code{"trim"} - trim the ends; output array length is equal to 
         \code{length(x)-2*k2 (out = out[(k2+1):(n-k2)])}. This option mimics 
         output of \code{\link{apply}} \code{(\link{embed}(x,k),1,FUN)} and other 
         related functions.
       \item \code{"keep"} - fill the ends with numbers from \code{x} vector 
         \code{(out[1:k2] = x[1:k2])}
       \item \code{"constant"} - fill the ends with first and last calculated 
         value in output array \code{(out[1:k2] = out[k2+1])}
       \item \code{"NA"} - fill the ends with NA's \code{(out[1:k2] = NA)}
       \item \code{"func"} - same as \code{"quantile"} but implimented
       in R. This option could be very slow, and is included mostly for testing
     }
     Similar to \code{endrule} in \code{\link{runmed}} function which has the 
     following options: \dQuote{\code{c("median", "keep", "constant")}} .
  }
  \item{probs}{numeric vector of probabilities with values in [0,1] range 
    used by \code{runquantile}. For example \code{Probs=c(0,0.5,1)} would be 
    equivalent to running \code{runmin}, \code{\link{runmed}} and \code{runmax}.
    Same as \code{probs} in \code{\link{quantile}} function. }
  \item{type}{an integer between 1 and 9 selecting one of the nine quantile 
    algorithms, same as \code{type} in \code{\link{quantile}} function. 
    Another even more readable description of nine ways to calculate quantiles 
    can be found at \url{http://mathworld.wolfram.com/Quantile.html}. }
  \item{align}{specifies whether result should be centered (default), 
  left-aligned or right-aligned.  If \code{endrule}="quantile" then setting
  \code{align} to "left" or "right" will fall back on slower implementation 
  equivalent to \code{endrule}="func". }
}

\details{
  Apart from the end values, the result of y = runquantile(x, k) is the same as 
  \dQuote{\code{for(j=(1+k2):(n-k2)) y[j]=quintile(x[(j-k2):(j+k2)],na.rm = TRUE)}}. It can handle 
non-finite numbers like NaN's and Inf's (like \code{\link{quantile}(x, na.rm = TRUE)}).

  The main incentive to write this set of functions was relative slowness of 
  majority of moving window functions available in R and its packages.  With the 
  exception of \code{\link{runmed}}, a running window median function, all 
  functions listed in "see also" section are slower than very inefficient 
  \dQuote{\code{\link{apply}(\link{embed}(x,k),1,FUN)}} approach. Relative 
  speeds of \code{runquantile} is O(n*k)

  Functions \code{runquantile} and \code{runmad} are using insertion sort to 
  sort the moving window, but gain speed by remembering results of the previous 
  sort. Since each time the window is moved, only one point changes, all but one 
  points in the window are already sorted. Insertion sort can fix that in O(k) 
  time.
}

\value{
  If \code{x} is a matrix than function \code{runquantile} returns a matrix of 
  size [n \eqn{\times}{x} \code{\link{length}}(probs)]. If \code{x} is vactor 
  a than function \code{runquantile} returns a matrix of size 
  [\code{\link{dim}}(x) \eqn{\times}{x} \code{\link{length}}(probs)]. 
  If \code{endrule="trim"} the output will have fewer rows. 
}

\references{
  \itemize{       
    \item About quantiles: Hyndman, R. J. and Fan, Y. (1996) \emph{Sample 
       quantiles in statistical packages, American Statistician}, 50, 361. 
    \item About quantiles: Eric W. Weisstein. \emph{Quantile}. From MathWorld-- 
     A Wolfram Web Resource. \url{http://mathworld.wolfram.com/Quantile.html} 
    
  \item About insertion sort used in \code{runmad} and \code{runquantile}: 
  R. Sedgewick (1988): \emph{Algorithms}. Addison-Wesley (page 99)
  }
} 

\author{Jarek Tuszynski (SAIC) \email{jaroslaw.w.tuszynski@saic.com}}

\seealso{
  Links related to:
  \itemize{       
   \item Running Quantile - \code{\link{quantile}}, \code{\link{runmed}}, 
     \code{\link{smooth}}, \code{\link[zoo]{rollmedian}} from 
     \pkg{zoo} library
   \item Other moving window functions  from this package: \code{\link{runmin}}, 
     \code{\link{runmax}}, \code{\link{runmean}}, \code{\link{runmad}} and
     \code{\link{runsd}}
   \item Running Minimum - \code{\link{min}}, \code{\link[fSeries]{rollMin}} from 
     \pkg{fSeries} library
   \item Running Maximum - \code{\link{max}}, \code{\link[fSeries]{rollMax}} from 
     \pkg{fSeries} library, \code{\link[zoo]{rollmax}} from \pkg{zoo} library
   \item generic running window functions: \code{\link{apply}}\code{
     (\link{embed}(x,k), 1, FUN)} (fastest), \code{\link[fSeries]{rollFun}} 
     from \pkg{fSeries} (slow), \code{\link[gtools]{running}} from \pkg{gtools} 
     package (extremely slow for this purpose), \code{\link[zoo]{rapply}} from 
     \pkg{zoo} library, \code{\link[magic]{subsums}} from 
     \pkg{magic} library can perform running window operations on data with any 
     dimensions. 
  }
}

\examples{
  # show plot using runquantile
  k=31; n=200;
  x = rnorm(n,sd=30) + abs(seq(n)-n/4)
  y=runquantile(x, k, probs=c(0.05, 0.25, 0.5, 0.75, 0.95))
  col = c("black", "red", "green", "blue", "magenta", "cyan")
  plot(x, col=col[1], main = "Moving Window Quantiles")
  lines(y[,1], col=col[2])
  lines(y[,2], col=col[3])
  lines(y[,3], col=col[4])
  lines(y[,4], col=col[5])
  lines(y[,5], col=col[6])
  lab = c("data", "runquantile(.05)", "runquantile(.25)", "runquantile(0.5)", 
          "runquantile(.75)", "runquantile(.95)")
  legend(0,230, lab, col=col, lty=1 )

  # show plot using runquantile
  k=15; n=200;
  x = rnorm(n,sd=30) + abs(seq(n)-n/4)
  y=runquantile(x, k, probs=c(0.05, 0.25, 0.5, 0.75, 0.95))
  col = c("black", "red", "green", "blue", "magenta", "cyan")
  plot(x, col=col[1], main = "Moving Window Quantiles (smoothed)")
  lines(runmean(y[,1],k), col=col[2])
  lines(runmean(y[,2],k), col=col[3])
  lines(runmean(y[,3],k), col=col[4])
  lines(runmean(y[,4],k), col=col[5])
  lines(runmean(y[,5],k), col=col[6])
  lab = c("data", "runquantile(.05)", "runquantile(.25)", "runquantile(0.5)", 
          "runquantile(.75)", "runquantile(.95)")
  legend(0,230, lab, col=col, lty=1 )
  
  # basic tests against runmin & runmax
  y = runquantile(x, k, probs=c(0, 1))
  a = runmin(x,k) # test only the inner part 
  stopifnot(all(a==y[,1], na.rm=TRUE));
  a = runmax(x,k) # test only the inner part
  stopifnot(all(a==y[,2], na.rm=TRUE));
  
  # basic tests against runmed, including testing endrules
  a = runquantile(x, k, probs=0.5, endrule="keep")
  b = runmed(x, k, endrule="keep")
  stopifnot(all(a==b, na.rm=TRUE));
  a = runquantile(x, k, probs=0.5, endrule="constant")
  b = runmed(x, k, endrule="constant")
  stopifnot(all(a==b, na.rm=TRUE));

  # basic tests against apply/embed
  a = runquantile(x,k, c(0.3, 0.7), endrule="trim")
  b = t(apply(embed(x,k), 1, quantile, probs = c(0.3, 0.7)))
  eps = .Machine$double.eps ^ 0.5
  stopifnot(all(abs(a-b)<eps));
  
  # test against loop approach
  # this test works fine at the R prompt but fails during package check - need to investigate
  k=25; n=200;
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # create random data
  x[seq(1,n,11)] = NaN;                # add NANs
  k2 = k%/%2
  k1 = k-k2-1
  a = runquantile(x, k, probs=c(0.3, 0.8) )
  b = matrix(0,n,2);
  for(j in 1:n) {
    lo = max(1, j-k1)
    hi = min(n, j+k2)
    b[j,] = quantile(x[lo:hi], probs=c(0.3, 0.8), na.rm = TRUE)
  }
  #stopifnot(all(abs(a-b)<eps));
  
  # compare calculation of array ends
  a = runquantile(x, k, probs=0.4, endrule="quantile") # fast C code
  b = runquantile(x, k, probs=0.4, endrule="func")     # slow R code
  stopifnot(all(abs(a-b)<eps));
  
  # test if moving windows forward and backward gives the same results
  k=51;
  a = runquantile(x     , k, probs=0.4)
  b = runquantile(x[n:1], k, probs=0.4)
  stopifnot(all(a[n:1]==b, na.rm=TRUE));

  # test vector vs. matrix inputs, especially for the edge handling
  nRow=200; k=25; nCol=10
  x = rnorm(nRow,sd=30) + abs(seq(nRow)-n/4)
  x[seq(1,nRow,10)] = NaN;              # add NANs
  X = matrix(rep(x, nCol ), nRow, nCol) # replicate x in columns of X
  a = runquantile(x, k, probs=0.6)
  b = runquantile(X, k, probs=0.6)
  stopifnot(all(abs(a-b[,1])<eps));        # vector vs. 2D array
  stopifnot(all(abs(b[,1]-b[,nCol])<eps)); # compare rows within 2D array

  # Exhaustive testing of runquantile to standard R approach
  numeric.test = function (x, k) {
    probs=c(1, 25, 50, 75, 99)/100
    a = runquantile(x,k, c(0.3, 0.7), endrule="trim")
    b = t(apply(embed(x,k), 1, quantile, probs = c(0.3, 0.7), na.rm=TRUE))
    eps = .Machine$double.eps ^ 0.5
    stopifnot(all(abs(a-b)<eps));
  }
  n=50;
  x = rnorm(n,sd=30) + abs(seq(n)-n/4) # nice behaving data
  for(i in 2:5) numeric.test(x, i)     # test small window sizes
  for(i in 1:5) numeric.test(x, n-i+1) # test large window size
  x[seq(1,50,10)] = NaN;               # add NANs and repet the test
  for(i in 2:5) numeric.test(x, i)     # test small window sizes
  for(i in 1:5) numeric.test(x, n-i+1) # test large window size
  
  # Speed comparison
  \dontrun{
  x=runif(1e6); k=1e3+1;
  system.time(runquantile(x,k,0.5))    # Speed O(n*k)
  system.time(runmed(x,k))             # Speed O(n * log(k)) 
  }
}

\keyword{ts}
\keyword{smooth}
\keyword{array}
\keyword{utilities}
\concept{moving min}
\concept{rolling min}
\concept{running min}
\concept{moving max}
\concept{rolling max}
\concept{running max}
\concept{moving minimum}
\concept{rolling minimum}
\concept{running minimum}
\concept{moving maximum}
\concept{rolling maximum}
\concept{running maximum}
\concept{moving quantile}
\concept{rolling quantile}
\concept{running quantile}
\concept{moving percentile}
\concept{rolling percentile}
\concept{running percentile}
\concept{moving median}
\concept{rolling median}
\concept{running median}
\concept{moving window}
\concept{rolling window}
\concept{running window}