File: mlv.R

package info (click to toggle)
r-cran-modeest 2.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 280 kB
  • sloc: makefile: 2
file content (304 lines) | stat: -rw-r--r-- 8,794 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304

# TODO: packages 'ks' et 'kedd', notamment pour les kernel density derivative estimates

#' @title 
#' Estimation of the Mode(s) or Most Likely Value(s)
#' 
#' @description 
#' \code{mlv} is a generic function for estimating the mode of a univariate distribution. 
#' Different estimates (or methods) are provided: 
#' \itemize{
#'   \item \code{\link{mfv}}, which returns the most frequent value(s) in a given numerical vector, 
#'   \item the \code{\link{Lientz}} mode estimator, which is the value minimizing the Lientz function estimate, 
#'   \item the Chernoff mode estimator, also called \code{\link{naive}} mode estimator, 
#'   which is defined as the center of the interval of given length containing the most observations, 
#'   \item the \code{\link{Venter}} mode estimator, including the \code{\link{shorth}}, i.e. the midpoint of the modal interval, 
#'   \item the \code{\link{Grenander}} mode estimator, 
#'   \item the half sample mode (\code{\link{HSM}}) and the half range mode (\code{\link{HRM}}), which are iterative versions of the Venter mode estimator, 
#'   \item \code{\link{Parzen}}'s kernel mode estimator, which is the value maximizing the kernel density estimate, 
#'   \item the \code{\link{Tsybakov}} mode estimator, based on a gradient-like recursive algorithm, 
#'   \item the \code{\link{Asselin}} de Beauville mode estimator, based on a algorithm detecting chains and holes in the sample, 
#'   \item the \code{\link{Vieu}} mode estimator, 
#'   \item the \code{\link{meanshift}} mode estimator. 
#' }
#' 
#' \code{mlv} can also be used to compute the mode of a given distribution, with \code{mlv.character}.
#' 
#' @details 
#' For the default method of \code{mlv}, available methods are \code{"lientz"}, 
#' \code{"naive"}, \code{"venter"}, 
#' \code{"grenander"}, \code{"hsm"}, \code{"parzen"}, 
#' \code{"tsybakov"}, \code{"asselin"}, and \code{"meanshift"}. 
#' See the description above and the associated links. 
#' 
#' If \code{x} is of class \code{"character"} (with length > 1), 
#' \code{"factor"}, or \code{"integer"}, then the most frequent value found in 
#' \code{x} is returned using \code{\link[statip]{mfv}} from package 
#' \pkg{statip}. 
#' 
#' If \code{x} is of class \code{"character"} (with length 1), 
#' \code{x} should be one of \code{"beta"}, \code{"cauchy"}, \code{"gev"}, etc. 
#' i.e. a character for which a function \code{*Mode} exists 
#' (for instance \code{betaMode}, \code{cauchyMode}, etc.). 
#' See \code{\link[modeest]{distrMode}} for the available functions. 
#' The mode of the corresponding distribution is returned. 
#' 
# #' If \code{x} is of class \code{"density"}, the value where the density is 
# #' maximised is returned. 
# #' 
#' If \code{x} is of class \code{mlv.lientz}, see \code{\link[modeest]{Lientz}} 
#' for more details. 
#' 
#' @references 
#' See the references on mode estimation on the \code{\link[modeest]{modeest-package}}'s page. 
#' 
#' @param x
#' numeric (vector of observations), or an object of class \code{"factor"}, \code{"integer"}, etc. 
#' 
#' @param bw
#' numeric. The bandwidth to be used. 
#' This may have different meanings regarding the \code{method} used. 
#' 
#' @param method
#' character. One of the methods available for computing the mode estimate. See 'Details'. 
#' 
#' @param na.rm
#' logical. Should missing values be removed? 
#' 
# #' @param all
# #' logical. 
# #' 
# #' @param abc
# #' logical. If \code{FALSE} (the default), the estimate of the density function 
# #' is maximised using \code{\link{optim}}. 
# #' 
#' @param ...
#' Further arguments to be passed to the function called for computation. 
#' 
#' @return 
#' A vector of the same type as \code{x}. 
#' Be aware that the length of this vector can be \code{> 1}. 
#' 
#' @seealso 
#' \code{\link[statip]{mfv}}, 
#' \code{\link[modeest]{parzen}}, 
#' \code{\link[modeest]{venter}}, 
#' \code{\link[modeest]{meanshift}},
#' \code{\link[modeest]{grenander}}, 
# #' \code{\link[modeest]{hrm}}, 
#' \code{\link[modeest]{hsm}}, 
#' \code{\link[modeest]{lientz}}, 
#' \code{\link[modeest]{naive}}, 
#' \code{\link[modeest]{tsybakov}}, 
#' \code{\link[modeest]{skewness}}
#' 
#' @export
#' 
#' @examples 
#' # Unimodal distribution
#' x <- rbeta(1000,23,4)
#' 
#' ## True mode
#' betaMode(23, 4)
#' # or
#' mlv("beta", shape1 = 23, shape2 = 4)
#' 
#' ## Be aware of this behaviour: 
#' mlv("norm") # returns 0, the mode of the standard normal distribution
#' mlv("normal") # returns 0 again, since "normal" is matched with "norm"
#' mlv("abnormal") # returns "abnormal", since the input vector "abrnormal" 
#' # is not recognized as a distribution name, hence is taken as a character 
#' # vector from which the most frequent value is requested. 
#' 
#' ## Estimate of the mode
#' mlv(x, method = "lientz", bw = 0.2)
#' mlv(x, method = "naive", bw = 1/3)
#' mlv(x, method = "venter", type = "shorth")
#' mlv(x, method = "grenander", p = 4)
# #' mlv(x, method = "hrm", bw = 0.3)
#' mlv(x, method = "hsm")
#' mlv(x, method = "parzen", kernel = "gaussian")
#' mlv(x, method = "tsybakov", kernel = "gaussian")
#' mlv(x, method = "asselin", bw = 2/3)
#' mlv(x, method = "vieu")
#' mlv(x, method = "meanshift")
#' 
mlv <-
function(x,
         ...)
{
  UseMethod("mlv")
}


#' @importFrom statip name2distr
#' @export
#' @rdname mlv
#' 
mlv.character <- 
function(x, 
         na.rm = FALSE,
         ...)
{
  stopifnot(is.character(x))
  if (length(x)==1L && statip::name2distr(x) %in% .distributionsList()) {
    distrMode(x, ...)
  } else {
    mfv(x, na_rm = na.rm)
  }
}


#' @export
#' @rdname mlv
#' 
mlv.factor <-
function(x,
         na.rm = FALSE,
         ...)
{
  stopifnot(is.factor(x))
  mfv(x, na_rm = na.rm)
}


#' @export
#' @rdname mlv
#' 
mlv.logical <-
function(x,
         na.rm = FALSE,
         ...)
{
  stopifnot(is.logical(x))
  mfv(x, na_rm = na.rm)
}


#' @export
#' @rdname mlv
#' 
mlv.integer <-
function(x,
         na.rm = FALSE,
         ...)
{
  stopifnot(is.integer(x))
  mfv(x, na_rm = na.rm)
}


# #' @importFrom bazar is_wholenumber
#' @export
#' @rdname mlv
#' 
mlv.default <-
function(x,
         bw = NULL,
         method,
         na.rm = FALSE,
         ...)
{

  stopifnot(is.numeric(x))
  x <- as.vector(x)

  #test <- bazar::is_wholenumber(x)
  #if (is.na(test) || test) {
  #  return(mfv(as.integer(round(x)), na_rm = na.rm))
  #}
  
  x.na <- is.na(x)
  if (any(x.na)) {
    if (na.rm) {
      x <- x[!x.na]
    } else {
      stop("argument 'x' contains missing values", 
           call. = FALSE)
    }
  }

  x.finite <- is.finite(x)
  if (any(!x.finite)) {
    x <- x[x.finite]
  }

  if (missing(method)) {
    warning("argument 'method' is missing. Data are supposed to be continuous. 
            Default method 'shorth' is used", 
            call. = FALSE)
    method <- "shorth"
  #} else if (tolower(method) == "mfv") {
  #  stop("incorrect 'method' argument")
  } else if (pmatch(tolower(method), c("density", "kernel"), nomatch = 0)) {
    method <- "parzen"
  } else method <- match.arg(tolower(method), .methodsList())
  
  if (method == "lientz") method <- "mlv.lientz"
  
  do.call(method, list(x = x, bw = bw, ...)) # possibly length > 1
  #mean(theta)
}


# #' @export
# #' @rdname mlv
# #' 
# mlv.density <-
# function(x,
#          all = TRUE, 
#          abc = FALSE,
#          ...)
# {
#   # TODO: A MODIFIER EN MEME TEMPS QUE 'parzen'
#   
#   if (!inherits(x, "density")) stop("argument 'x' must inherit from class 'density'")
#   
#   y <- x$y
#   x <- x$x
# 
#   den.s <- stats::smooth.spline(x, y, all.knots=TRUE, spar=spar)
#   s.1 <- stats::predict(den.s, den.s$x, deriv = 1)
#   s.0 <- stats::predict(den.s, den.s$x, deriv = 0)
#   
#   den.sign <- sign(s.1$y)
#   b <- rle(den.sign)$values
#   nmodes <- length(b)/2
#   #if (nmodes > 10) { nmodes <- 10 }
#   if (is.na(nmodes)) { nmodes <- 0 }
#   
#   a <- c(1,1+which(diff(den.sign)!=0))
#   df <- data.frame(a,b)
#   df <- df[which(df$b %in% -1),]
#   modes <- s.1$x[df$a]
#   density <- s.0$y[df$a]
#   df2 <- data.frame(modes,density)
#   df2 <- df2[with(df2, order(-density)), ] # ordered by density
#   df2
#   
#   
# #-------------------  
#   
#   idx <- y == max(y)
#   M <- x[idx]
# 
#   if (all) {
#     yy <- c(0, y, 0)
#     ny <- length(yy)
#     idx <- (yy[2:(ny - 1)] > yy[1:(ny - 2)]) & (yy[2:(ny - 1)] > yy[3:ny])
#     M <- unique(c(x[idx], M))
#   }
#    
#   M
# }


#' @export
#' @rdname mlv
#'
mlv1 <-
function(x, 
         ...)
{
  mlv(x, ...)[[1L]]
}