File: fill.R

package info (click to toggle)
r-cran-tidyr 1.3.1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 2,720 kB
  • sloc: cpp: 268; sh: 9; makefile: 2
file content (112 lines) | stat: -rw-r--r-- 4,051 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#' Fill in missing values with previous or next value
#'
#' Fills missing values in selected columns using the next or previous entry.
#' This is useful in the common output format where values are not repeated,
#' and are only recorded when they change.
#'
#' Missing values are replaced in atomic vectors; `NULL`s are replaced in lists.
#'
#' @section Grouped data frames:
#' With grouped data frames created by [dplyr::group_by()], `fill()` will be
#' applied _within_ each group, meaning that it won't fill across group
#' boundaries.
#'
#' @param data A data frame.
#' @param ... <[`tidy-select`][tidyr_tidy_select]> Columns to fill.
#' @param .direction Direction in which to fill missing values. Currently
#'   either "down" (the default), "up", "downup" (i.e. first down and then up)
#'   or "updown" (first up and then down).
#' @export
#' @examples
#' # direction = "down" --------------------------------------------------------
#' # Value (year) is recorded only when it changes
#' sales <- tibble::tribble(
#'   ~quarter, ~year, ~sales,
#'   "Q1",    2000,    66013,
#'   "Q2",      NA,    69182,
#'   "Q3",      NA,    53175,
#'   "Q4",      NA,    21001,
#'   "Q1",    2001,    46036,
#'   "Q2",      NA,    58842,
#'   "Q3",      NA,    44568,
#'   "Q4",      NA,    50197,
#'   "Q1",    2002,    39113,
#'   "Q2",      NA,    41668,
#'   "Q3",      NA,    30144,
#'   "Q4",      NA,    52897,
#'   "Q1",    2004,    32129,
#'   "Q2",      NA,    67686,
#'   "Q3",      NA,    31768,
#'   "Q4",      NA,    49094
#' )
#' # `fill()` defaults to replacing missing data from top to bottom
#' sales %>% fill(year)
#'
#' # direction = "up" ----------------------------------------------------------
#' # Value (pet_type) is missing above
#' tidy_pets <- tibble::tribble(
#'   ~rank, ~pet_type, ~breed,
#'   1L,        NA,    "Boston Terrier",
#'   2L,        NA,    "Retrievers (Labrador)",
#'   3L,        NA,    "Retrievers (Golden)",
#'   4L,        NA,    "French Bulldogs",
#'   5L,        NA,    "Bulldogs",
#'   6L,     "Dog",    "Beagles",
#'   1L,        NA,    "Persian",
#'   2L,        NA,    "Maine Coon",
#'   3L,        NA,    "Ragdoll",
#'   4L,        NA,    "Exotic",
#'   5L,        NA,    "Siamese",
#'   6L,     "Cat",    "American Short"
#' )
#'
#' # For values that are missing above you can use `.direction = "up"`
#' tidy_pets %>%
#'   fill(pet_type, .direction = "up")
#'
#' # direction = "downup" ------------------------------------------------------
#' # Value (n_squirrels) is missing above and below within a group
#' squirrels <- tibble::tribble(
#'   ~group,    ~name,     ~role,     ~n_squirrels,
#'   1,      "Sam",    "Observer",   NA,
#'   1,     "Mara", "Scorekeeper",    8,
#'   1,    "Jesse",    "Observer",   NA,
#'   1,      "Tom",    "Observer",   NA,
#'   2,     "Mike",    "Observer",   NA,
#'   2,  "Rachael",    "Observer",   NA,
#'   2,  "Sydekea", "Scorekeeper",   14,
#'   2, "Gabriela",    "Observer",   NA,
#'   3,  "Derrick",    "Observer",   NA,
#'   3,     "Kara", "Scorekeeper",    9,
#'   3,    "Emily",    "Observer",   NA,
#'   3, "Danielle",    "Observer",   NA
#' )
#'
#' # The values are inconsistently missing by position within the group
#' # Use .direction = "downup" to fill missing values in both directions
#' squirrels %>%
#'   dplyr::group_by(group) %>%
#'   fill(n_squirrels, .direction = "downup") %>%
#'   dplyr::ungroup()
#'
#' # Using `.direction = "updown"` accomplishes the same goal in this example
fill <- function(data, ..., .direction = c("down", "up", "downup", "updown")) {
  check_dots_unnamed()
  UseMethod("fill")
}

#' @export
fill.data.frame <- function(data, ..., .direction = c("down", "up", "downup", "updown")) {
  vars <- tidyselect::eval_select(expr(c(...)), data, allow_rename = FALSE)

  .direction <- arg_match0(
    arg = .direction,
    values = c("down", "up", "downup", "updown"),
  )

  fn <- function(col) {
    vec_fill_missing(col, direction = .direction)
  }

  dplyr::mutate_at(data, .vars = dplyr::vars(any_of(vars)), .funs = fn)
}