File: stat-boxplot.r

package info (click to toggle)
r-cran-ggplot2 1.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 4,412 kB
  • sloc: sh: 9; makefile: 1
file content (84 lines) | stat: -rw-r--r-- 3,065 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#' Calculate components of box and whisker plot.
#'
#' @section Aesthetics:
#' \Sexpr[results=rd,stage=build]{ggplot2:::rd_aesthetics("stat", "boxplot")}
#'
#' @param coef length of the whiskers as multiple of IQR.  Defaults to 1.5
#' @param na.rm If \code{FALSE} (the default), removes missing values with
#'    a warning.  If \code{TRUE} silently removes missing values.
#' @inheritParams stat_identity
#' @return A data frame with additional columns:
#'   \item{width}{width of boxplot}
#'   \item{ymin}{lower whisker = smallest observation greater than or equal to lower hinge - 1.5 * IQR}
#'   \item{lower}{lower hinge, 25\% quantile}
#'   \item{notchlower}{lower edge of notch = median - 1.58 * IQR / sqrt(n)}
#'   \item{middle}{median, 50\% quantile}
#'   \item{notchupper}{upper edge of notch = median + 1.58 * IQR / sqrt(n)}
#'   \item{upper}{upper hinge, 75\% quantile}
#'   \item{ymax}{upper whisker = largest observation less than or equal to upper hinge + 1.5 * IQR}
#' @seealso See \code{\link{geom_boxplot}} for examples.
#' @export
#' @examples
#' # See geom_boxplot for examples
stat_boxplot <- function (mapping = NULL, data = NULL, geom = "boxplot", position = "dodge",
na.rm = FALSE, coef = 1.5, ...) {
  StatBoxplot$new(mapping = mapping, data = data, geom = geom,
  position = position, na.rm = na.rm, coef = coef, ...)
}

StatBoxplot <- proto(Stat, {
  objname <- "boxplot"

  required_aes <- c("x", "y")
  default_geom <- function(.) GeomBoxplot

  calculate_groups <- function(., data, na.rm = FALSE, width = NULL, ...) {
    data <- remove_missing(data, na.rm, c("y", "weight"), name="stat_boxplot",
      finite = TRUE)
    data$weight <- data$weight %||% 1
    width <- width %||%  resolution(data$x) * 0.75

    .super$calculate_groups(., data, na.rm = na.rm, width = width, ...)
  }

  calculate <- function(., data, scales, width=NULL, na.rm = FALSE, coef = 1.5, ...) {
    with(data, {
      qs <- c(0, 0.25, 0.5, 0.75, 1)
      if (length(unique(weight)) != 1) {
        try_require("quantreg")
        stats <- as.numeric(coef(rq(y ~ 1, weights = weight, tau=qs)))
      } else {
        stats <- as.numeric(quantile(y, qs))
      }
      names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")

      iqr <- diff(stats[c(2, 4)])

      outliers <- y < (stats[2] - coef * iqr) | y > (stats[4] + coef * iqr)
      if (any(outliers)) {
        stats[c(1, 5)] <- range(c(stats[2:4], y[!outliers]), na.rm=TRUE)
      }

      if (length(unique(x)) > 1) width <- diff(range(x)) * 0.9

      df <- as.data.frame(as.list(stats))
      df$outliers <- I(list(y[outliers]))

      if (is.null(weight)) {
        n <- sum(!is.na(y))
      } else {
        # Sum up weights for non-NA positions of y and weight
        n <- sum(weight[!is.na(y) & !is.na(weight)])
      }

      df$notchupper <- df$middle + 1.58 * iqr / sqrt(n)
      df$notchlower <- df$middle - 1.58 * iqr / sqrt(n)

      transform(df,
        x = if (is.factor(x)) x[1] else mean(range(x)),
        width = width,
        relvarwidth = sqrt(n)
      )
    })
  }
})