File: compute_count.R

package info (click to toggle)
r-cran-ggvis 0.4.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,716 kB
  • sloc: sh: 25; makefile: 2
file content (111 lines) | stat: -rw-r--r-- 3,458 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#' Count data at each location
#'
#' @param x Dataset-like object to count. Built-in methods for data frames,
#'   grouped data frames and ggvis visualisations.
#' @param x_var,w_var Names of x and weight variables.
#' @seealso \code{\link{compute_bin}} For counting cases within ranges of
#'   a continuous variable.
#' @seealso \code{\link{compute_align}} For calculating the "width" of data.
#' @export
#' @return A data frame with columns:
#'  \item{count_}{the number of points}
#'  \item{x_}{the x value where the count was made}
#'
#' The width of each "bin" is set to the resolution of the data -- that is, the
#' smallest difference between two x values.
#'
#' @examples
#' mtcars %>% compute_count(~cyl)
#'
#' # Weight the counts by car weight value
#' mtcars %>% compute_count(~cyl, ~wt)
#'
#' # If there's one weight value at each x, it effectively just renames columns.
#' pressure %>% compute_count(~temperature, ~pressure)
#' # Also get the width of each bin
#' pressure %>% compute_count(~temperature, ~pressure) %>% compute_align(~x_)
#'
#' # It doesn't matter whether you transform inside or outside of a vis
#' mtcars %>% compute_count(~cyl, ~wt) %>%
#'   compute_align(~x_) %>%
#'   ggvis(x = ~xmin_, x2 = ~xmax_, y = ~count_, y2 = 0) %>%
#'   layer_rects()
#'
#' mtcars %>%
#'   ggvis(x = ~xmin_, x2 = ~xmax_, y = ~count_, y2 = 0) %>%
#'   compute_count(~cyl, ~wt) %>%
#'   compute_align(~x_) %>%
#'   layer_rects()
compute_count <- function(x, x_var, w_var = NULL) {
  UseMethod("compute_count")
}

#' @export
compute_count.data.frame <- function(x, x_var, w_var = NULL) {
  assert_that(is.formula(x_var))

  x_val <- eval_vector(x, x_var)

  if (is.null(w_var)) {
    w_val <- NULL
  } else {
    w_val <- eval_vector(x, w_var)
  }

  count_vector(x_val, weight = w_val)
}

#' @export
compute_count.grouped_df <- function(x, x_var, w_var = NULL) {
  dplyr::do(x, compute_count(., x_var, w_var = w_var))
}

#' @export
compute_count.ggvis <- function(x, x_var, w_var = NULL) {
  args <- list(x_var = x_var, w_var = w_var)

  register_computation(x, args, "count", function(data, args) {
    output <- do_call(compute_count, quote(data), .args = args)
    preserve_constants(data, output)
  })
}

# Count individual vector ------------------------------------------------------

count_vector <- function(x, weight = NULL, ...) {
  if (is.null(weight)) {
    weight <- rep.int(1, length(x))
  }

  # Preserve date and time types
  if (inherits(x, "POSIXct")) {
    tz <- attr(x, "tzone", TRUE)
    restore <- function(x) as.POSIXct(x, origin = "1970-01-01", tz = tz)
  } else if (inherits(x, "Date")) {
    restore <- function(x) structure(x, class = "Date")
  } else {
    restore <- identity
  }

  counts <- unname(as.vector(tapply(weight, x, sum, na.rm = TRUE)))

  if (is.factor(x)) {
    na_idx <- is.na(counts)
    # Factor levels that aren't represented in x result in NA, which we'll
    # drop.
    counts <- counts[!na_idx]
    # Get the factor levels, preserving factor-ness and order of factor levels.
    # Order should align with counts.
    values <- factor(levels(x), levels = levels(x))[!na_idx]
  } else {
    # Need to get unique values this way instead of using names(counts),
    # because names are strings but the x values aren't always strings.
    values <- unname(as.vector(tapply(x, x, unique, na.rm = TRUE)))
  }

  data.frame(
    count_ = counts,
    x_ = restore(values),
    stringsAsFactors = FALSE
  )
}