File: stat-boxplot.R

package info (click to toggle)
r-cran-ggplot2 4.0.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 11,084 kB
  • sloc: sh: 15; makefile: 5
file content (121 lines) | stat: -rw-r--r-- 4,361 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#' @rdname Stat
#' @format NULL
#' @usage NULL
#' @export
StatBoxplot <- ggproto("StatBoxplot", Stat,
  required_aes = c("y|x"),
  non_missing_aes = "weight",
  optional_aes = "width",
  # either the x or y aesthetic will get dropped during
  # statistical transformation, depending on the orientation
  dropped_aes = c("x", "y", "weight"),
  setup_data = function(self, data, params) {
    data <- flip_data(data, params$flipped_aes)
    data$x <- data$x %||% 0
    data <- remove_missing(
      data,
      na.rm = params$na.rm,
      vars = "x",
      name = "stat_boxplot"
    )
    flip_data(data, params$flipped_aes)
  },

  setup_params = function(self, data, params) {
    params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = TRUE,
                                          group_has_equal = TRUE,
                                          main_is_optional = TRUE,
                                        default = NA)

    if (is.na(params$flipped_aes) && any(c("x", "y") %in% names(data))) {
      cli::cli_warn("Orientation is not uniquely specified when both the x and y aesthetics are continuous. Picking default orientation 'x'.")
      params$flipped_aes <- FALSE
    }
    data <- flip_data(data, params$flipped_aes)

    has_x <- !(is.null(data$x) && is.null(params$x))
    has_y <- !(is.null(data$y) && is.null(params$y))
    if (!has_x && !has_y) {
      cli::cli_abort("{.fn {snake_class(self)}} requires an {.field x} or {.field y} aesthetic.")
    }

    params$width <- params$width %||% (resolution(data$x %||% 0, discrete = TRUE) * 0.75)

    if (!is_mapped_discrete(data$x) && is.double(data$x) && !has_groups(data) && any(data$x != data$x[1L])) {
      cli::cli_warn(c(
        "Continuous {.field {flipped_names(params$flipped_aes)$x}} aesthetic",
        "i" = "did you forget {.code aes(group = ...)}?"
      ))
    }

    params
  },

  extra_params = c("na.rm", "orientation"),

  compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, flipped_aes = FALSE) {
    data <- flip_data(data, flipped_aes)
    qs <- c(0, 0.25, 0.5, 0.75, 1)

    if (!is.null(data$weight)) {
      mod <- quantreg::rq(y ~ 1, weights = weight, data = data, tau = qs)
      stats <- as.numeric(stats::coef(mod))
    } else {
      stats <- as.numeric(stats::quantile(data$y, qs))
    }
    names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
    iqr <- diff(stats[c(2, 4)])

    outliers <- data$y < (stats[2] - coef * iqr) | data$y > (stats[4] + coef * iqr)
    if (any(outliers)) {
      stats[c(1, 5)] <- range(c(stats[2:4], data$y[!outliers]), na.rm = TRUE)
    }
    if (length(data$width) > 0L) {
      width <- data$width[1L]
    } else if (vec_unique_count(data$x) > 1) {
      width <- diff(range(data$x)) * 0.9
    }

    df <- data_frame0(!!!as.list(stats))
    df$outliers <- list(data$y[outliers])

    if (is.null(data$weight)) {
      n <- sum(!is.na(data$y))
    } else {
      # Sum up weights for non-NA positions of y and weight
      n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)])
    }

    df$notchupper <- df$middle + 1.58 * iqr / sqrt(n)
    df$notchlower <- df$middle - 1.58 * iqr / sqrt(n)

    df$x <- if (is.factor(data$x)) data$x[1] else mean(range(data$x))
    df$width <- width
    df$relvarwidth <- sqrt(n)
    df$flipped_aes <- flipped_aes
    flip_data(df, flipped_aes)
  }
)

#' @rdname geom_boxplot
#' @param coef Length of the whiskers as multiple of IQR. Defaults to 1.5.
#' @inheritParams stat_identity
#' @export
#' @eval rd_computed_vars(
#'   .details = "`stat_boxplot()` provides the following variables, some of
#'   which depend on the orientation:",
#'   width = "width of boxplot.",
#'   "ymin|xmin" = "lower whisker = smallest observation greater than or equal
#'   to lower hinger - 1.5 * IQR.",
#'   "lower|xlower" = "lower hinge, 25% quantile.",
#'   notchlower = "lower edge of notch = median - 1.58 * IQR / sqrt(n).",
#'   "middle|xmiddle" = "median, 50% quantile.",
#'   notchupper = "upper edge of notch = median + 1.58 * IQR / sqrt(n).",
#'   "upper|xupper" = "upper hinge, 75% quantile.",
#'   "ymax|xmax" = "upper whisker = largest observation less than or equal to
#'   upper hinger + 1.5 * IQR."
#' )
stat_boxplot <- make_constructor(
  StatBoxplot, geom = "boxplot", position = "dodge2",
  orientation = NA, omit = "width"
)