File: ExtremesData.Rd

package info (click to toggle)
fextremes 4032.84-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 744 kB
  • sloc: makefile: 14
file content (328 lines) | stat: -rw-r--r-- 11,271 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
\name{ExtremesData}

\alias{ExtremesData}

\alias{emdPlot}
\alias{qqparetoPlot}

\alias{mePlot}
\alias{mrlPlot}
\alias{mxfPlot}

\alias{msratioPlot}

\alias{recordsPlot}
\alias{ssrecordsPlot}

\alias{sllnPlot}
\alias{lilPlot}

\alias{xacfPlot}

\alias{normMeanExcessFit} 
\alias{ghMeanExcessFit}   
\alias{hypMeanExcessFit}    
\alias{nigMeanExcessFit}    
\alias{ghtMeanExcessFit}    


\title{Explorative Data Analysis}


\description{

    A collection and description of functions for 
    explorative data analysis. The tools include 
    plot functions for empirical distributions, quantile 
    plots, graphs exploring the properties of exceedances 
    over a threshold, plots for mean/sum ratio and for 
    the development of records.
    \cr
    
    The functions are:
    
    \tabular{ll}{
    \code{emdPlot} \tab Plot of empirical distribution function, \cr
    \code{qqparetoPlot} \tab Exponential/Pareto quantile plot, \cr
    \code{mePlot} \tab Plot of mean excesses over a threshold, \cr
    \code{mrlPlot} \tab another variant, mean residual life plot, \cr
    \code{mxfPlot} \tab another variant, with confidence intervals, \cr
    \code{msratioPlot} \tab Plot of the ratio of maximum and sum, \cr   
    \code{recordsPlot} \tab Record development compared with iid data, \cr
    \code{ssrecordsPlot} \tab another variant, investigates subsamples, \cr
    \code{sllnPlot} \tab verifies Kolmogorov's strong law of large numbers, \cr
    \code{lilPlot} \tab verifies Hartman-Wintner's law of the iterated logarithm, \cr
    \code{xacfPlot} \tab ACF of exceedances over a threshold, \cr
    \code{normMeanExcessFit} \tab fits mean excesses with a normal density, \cr
    \code{ghMeanExcessFit} \tab fits mean excesses with a GH density, \cr   
    \code{hypMeanExcessFit} \tab fits mean excesses with a HYP density, \cr   
    \code{nigMeanExcessFit} \tab fits mean excesses with a NIG density, \cr  
    \code{ghtMeanExcessFit} \tab fits mean excesses with a GHT density. }   
    
}


\usage{
emdPlot(x, doplot = TRUE, plottype = c("xy", "x", "y", " "), 
    labels = TRUE, \dots)

qqparetoPlot(x, xi = 0, trim = NULL, threshold = NULL, doplot = TRUE, 
    labels = TRUE, \dots)

mePlot(x, doplot = TRUE, labels = TRUE, \dots)
mrlPlot(x, ci = 0.95, umin = mean(x), umax = max(x), nint = 100, doplot = TRUE, 
     plottype = c("autoscale", ""), labels = TRUE, \dots)  
mxfPlot(x, u = quantile(x, 0.05), doplot = TRUE, labels = TRUE, \dots)  
   
msratioPlot(x, p = 1:4, doplot = TRUE, labels = TRUE, \dots) 
   
recordsPlot(x, ci = 0.95, doplot = TRUE, labels = TRUE, \dots)
ssrecordsPlot(x, subsamples = 10, doplot = TRUE, plottype = c("lin", "log"),
    labels = TRUE, \dots)
    
sllnPlot(x, doplot = TRUE, labels = TRUE, \dots)
lilPlot(x, doplot = TRUE, labels = TRUE, \dots)

xacfPlot(x, u = quantile(x, 0.95), lag.max = 15, doplot = TRUE, 
    which = c("all", 1, 2, 3, 4), labels = TRUE, \dots)
    
normMeanExcessFit(x, doplot = TRUE, trace = TRUE, \dots)
ghMeanExcessFit(x, doplot = TRUE, trace = TRUE, \dots)
hypMeanExcessFit(x, doplot = TRUE, trace = TRUE, \dots)
nigMeanExcessFit(x, doplot = TRUE, trace = TRUE, \dots)
ghtMeanExcessFit(x, doplot = TRUE, trace = TRUE, \dots)
}


\arguments{

    \item{ci}{
        [recordsPlot] - \cr
        a confidence level. By default 0.95, i.e. 95\%.
        }
    \item{doplot}{
        a logical value. Should the results be plotted? By 
        default \code{TRUE}.
        }
    \item{labels}{
        a logical value. Whether or not x- and y-axes should be automatically 
        labelled and a default main title should be added to the plot.
        By default \code{TRUE}.
        }
    \item{lag.max}{
        [xacfPlot] - \cr
        maximum number of lags at which to calculate the autocorrelation 
        functions. The default value is 15.
        }
    \item{nint}{
        [mrlPlot] - \cr
        the number of intervals, see \code{umin} and \code{umax}. The 
        default value is 100.
        }
    \item{p}{
        [msratioPlot] - \cr
        the power exponents, a numeric vector. By default a sequence from  
        1 to 4 in unit integer steps.
        }
    \item{plottype}{
        [emdPlot] - \cr
        which axes should be on a log scale: \code{"x"} x-axis only; 
        \code{"y"} y-axis only; \code{"xy"} both axes; \code{""} 
        neither axis.
        \cr
        [msratioPlot] - \cr
        a logical, if set to \code{"autoscale"}, then the scale of the 
        plots are automatically determined, any other string allows user
        specified scale information through the \code{\dots} argument.
        \cr
        [ssrecordsPlot] - \cr
        one from two options can be select either \code{"lin"}
        or \code{"log"}. The default creates a linear plot.
        } 
    \item{subsamples}{
        [ssrecordsPlot] - \cr
        the number of subsamples, by default 10, an integer value.
        }
    \item{threshold, trim}{
        [qPlot][xacfPlot] - \cr
        a numeric value at which data are to be left-truncated, value 
        at which data are to be right-truncated or the threshold value, 
        by default 95\%.
        }
    \item{trace}{
        a logical flag, by default \code{TRUE}. Should the calculations     
        be traced?
        }
    \item{u}{
        a numeric value at which level the data are to be truncated. By 
        default the threshold value which belongs to the 95\% quantile,
        \code{u=quantile(x,0.95)}.       
        }
    \item{umin, umax}{
        [mrlPlot] - \cr
        range of threshold values. If \code{umin} and/or \code{umax} are 
        not available, then by default they are set to the following 
        values: \code{umin=mean(x)} and \code{umax=max(x)}.
        }
    \item{which}{
        [xacfPlot] - \cr
        a numeric or character value, if \code{which="all"} then all
        four plots are displayed, if \code{which} is an integer between
        one and four, then the first, second, third or fourth plot will
        be displayed.
        }
    \item{x, y}{
        numeric data vectors or in the case of x an object to be plotted.  
        }
    \item{xi}{
        the shape parameter of the generalized Pareto distribution.
        }
    \item{\dots}{
        additional arguments passed to the FUN or plot function.
        }
        
}


\details{
  
    \bold{Empirical Distribution Function:}
    \cr\cr
    The function \code{emdPlot} is a simple explanatory function. A 
    straight line on the double log scale indicates Pareto tail behaviour.
    \cr
    
    
    \bold{Quantile--Quantile Pareto Plot:}
    \cr\cr      
    \code{qqparetoPlot} creates a quantile-quantile plot for threshold 
    data. If \code{xi} is zero the reference distribution is the 
    exponential; if \code{xi} is non-zero the reference distribution 
    is the generalized Pareto with that parameter value expressed 
    by \code{xi}. In the case of the exponential, the plot is 
    interpreted as follows: Concave departures from a straight line are a 
    sign of heavy-tailed behaviour, convex departures show thin-tailed 
    behaviour. 
    \cr
    
        
    \bold{Mean Excess Function Plot:}
    \cr\cr
    Three variants to plot the mean excess function are available: 
    A sample mean excess plot over increasing thresholds, and two mean 
    excess function plots with confidence intervals for discrimination 
    in the tails of a distribution.
    In general, an upward trend in a mean excess function plot shows 
    heavy-tailed behaviour. In particular, a straight line with positive 
    gradient above some threshold is a sign of Pareto behaviour in tail. 
    A downward trend shows thin-tailed behaviour whereas a line with 
    zero gradient shows an exponential tail. Here are some hints:
    Because upper plotting points are the average of a handful of extreme 
    excesses, these may be omitted for a prettier plot. 
    For \code{mrlPlot} and \code{mxfPlot} the upper tail is investigated; 
    for the lower tail reverse the sign of the \code{data} vector.
    \cr
    
    
    \bold{Plot of the Maximum/Sum Ratio:}
    \cr\cr
    The ratio of maximum and sum is a simple tool for detecting heavy 
    tails of a distribution and for giving a rough estimate of
    the order of its finite moments. Sharp increases in the curves
    of a \code{msratioPlot} are a sign for heavy tail behaviour.
    \cr
    
    
    \bold{Plot of the Development of Records:}
    \cr\cr
    These are functions that investigate the development of records in 
    a dataset and calculate the expected behaviour for iid data.
    \code{recordsPlot} counts records and reports the observations 
    at which they occur. In addition subsamples can be investigated
    with the help of the function \code{ssrecordsPlot}.
    \cr
    
    \bold{Plot of Kolmogorov's and Hartman-Wintner's Laws:}
    \cr\cr
    The function \code{sllnPlot} verifies Kolmogorov's strong law of 
    large numbers, and the function \code{lilPlot} verifies 
    Hartman-Wintner's law of the iterated logarithm.
    \cr
    
    \bold{ACF Plot of Exceedances over a Threshold:}
    \cr\cr
    This function plots the autocorrelation functions of heights and 
    distances of exceedances over a threshold.
    \cr
}


\value{
  
    The functions return a plot.

}


\note{

    The plots are labeled by default with a x-label, a y-label and
    a main title. If the argument \code{labels} is set to \code{FALSE}
    neither a x-label, a y-label nor a main title will be added to the
    graph. To add user defined label strings just use the 
    function \code{title(xlab="\dots", ylab="\dots", main="\dots")}.
    
}


\references{

Coles S. (2001);
    \emph{Introduction to Statistical Modelling of Extreme Values},
    Springer.
    
Embrechts, P., Klueppelberg, C., Mikosch, T. (1997);
    \emph{Modelling Extremal Events}, Springer.  
    
}


\author{

    Some of the functions were implemented from Alec Stephenson's 
    R-package \code{evir} ported from Alexander McNeil's S library 
    \code{EVIS}, \emph{Extreme Values in S}, some from Alec Stephenson's 
    R-package \code{ismev} based on Stuart Coles code from his book, 
    \emph{Introduction to Statistical Modeling of Extreme Values} and 
    some were written by Diethelm Wuertz.
    
}


\examples{ 
## Danish fire insurance data:
   data(danishClaims)
   library(timeSeries)
   danishClaims = as.timeSeries(danishClaims)
   
## emdPlot -
   # Show Pareto tail behaviour:
   par(mfrow = c(2, 2), cex = 0.7)
   emdPlot(danishClaims) 
   
## qqparetoPlot -
   # QQ-Plot of heavy-tailed Danish fire insurance data:
   qqparetoPlot(danishClaims, xi = 0.7) 
 
## mePlot -
   # Sample mean excess plot of heavy-tailed Danish fire:
   mePlot(danishClaims)
      
## ssrecordsPlot -
   # Record fire insurance losses in Denmark:
   ssrecordsPlot(danishClaims, subsamples = 10) 
}


\keyword{hplot}