File: unnest-wider.R

package info (click to toggle)
r-cran-tidyr 1.3.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,720 kB
  • sloc: cpp: 268; sh: 9; makefile: 2
file content (260 lines) | stat: -rw-r--r-- 7,828 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#' Unnest a list-column into columns
#'
#' @description
#' `unnest_wider()` turns each element of a list-column into a column. It
#' is most naturally suited to list-columns where every element is named,
#' and the names are consistent from row-to-row.
#' `unnest_wider()` preserves the rows of `x` while modifying the columns.
#'
#' Learn more in `vignette("rectangle")`.
#'
#' @inheritParams unnest_longer
#' @param names_sep If `NULL`, the default, the names will be left
#'   as is. If a string, the outer and inner names will be pasted together using
#'   `names_sep` as a separator.
#'
#'   If any values being unnested are unnamed, then `names_sep` must be
#'   supplied, otherwise an error is thrown. When `names_sep` is supplied,
#'   names are automatically generated for unnamed values as an increasing
#'   sequence of integers.
#' @param strict A single logical specifying whether or not to apply strict
#'   vctrs typing rules. If `FALSE`, typed empty values (like `list()` or
#'   `integer()`) nested within list-columns will be treated like `NULL` and
#'   will not contribute to the type of the unnested column. This is useful
#'   when working with JSON, where empty values tend to lose their type
#'   information and show up as `list()`.
#' @family rectangling
#' @export
#' @examples
#' df <- tibble(
#'   character = c("Toothless", "Dory"),
#'   metadata = list(
#'     list(
#'       species = "dragon",
#'       color = "black",
#'       films = c(
#'         "How to Train Your Dragon",
#'         "How to Train Your Dragon 2",
#'         "How to Train Your Dragon: The Hidden World"
#'       )
#'     ),
#'     list(
#'       species = "blue tang",
#'       color = "blue",
#'       films = c("Finding Nemo", "Finding Dory")
#'     )
#'   )
#' )
#' df
#'
#' # Turn all components of metadata into columns
#' df %>% unnest_wider(metadata)
#'
#' # Choose not to simplify list-cols of length-1 elements
#' df %>% unnest_wider(metadata, simplify = FALSE)
#' df %>% unnest_wider(metadata, simplify = list(color = FALSE))
#'
#' # You can also widen unnamed list-cols:
#' df <- tibble(
#'   x = 1:3,
#'   y = list(NULL, 1:3, 4:5)
#' )
#' # but you must supply `names_sep` to do so, which generates automatic names:
#' df %>% unnest_wider(y, names_sep = "_")
#'
#' # 0-length elements ---------------------------------------------------------
#' # The defaults of `unnest_wider()` treat empty types (like `list()`) as `NULL`.
#' json <- list(
#'   list(x = 1:2, y = 1:2),
#'   list(x = list(), y = 3:4),
#'   list(x = 3L, y = list())
#' )
#'
#' df <- tibble(json = json)
#' df %>%
#'   unnest_wider(json)
#'
#' # To instead enforce strict vctrs typing rules, use `strict`
#' df %>%
#'   unnest_wider(json, strict = TRUE)
unnest_wider <- function(data,
                         col,
                         names_sep = NULL,
                         simplify = TRUE,
                         strict = FALSE,
                         names_repair = "check_unique",
                         ptype = NULL,
                         transform = NULL) {

  check_data_frame(data)
  check_required(col)
  check_string(names_sep, allow_null = TRUE)
  check_bool(strict)

  error_call <- current_env()

  cols <- tidyselect::eval_select(enquo(col), data, allow_rename = FALSE)
  col_names <- names(cols)

  for (i in seq_along(cols)) {
    col <- cols[[i]]
    col_name <- col_names[[i]]

    data[[col]] <- col_to_wide(
      col = data[[col]],
      name = col_name,
      strict = strict,
      names_sep = names_sep
    )
  }

  data <- unchop(data, all_of(col_names), error_call = error_call)

  for (i in seq_along(cols)) {
    col <- cols[[i]]

    data[[col]] <- df_simplify(
      x = data[[col]],
      ptype = ptype,
      transform = transform,
      simplify = simplify
    )
  }

  unpack(
    data = data,
    cols = all_of(col_names),
    names_repair = names_repair,
    error_call = error_call
  )
}

# Converts a column of any type to a `list_of<tbl>`
col_to_wide <- function(col, name, strict, names_sep, error_call = caller_env()) {
  if (!vec_is_list(col)) {
    ptype <- vec_ptype(col)
    col <- vec_chop(col)
    col <- new_list_of(col, ptype = ptype)
  }

  # If we don't have a list_of, then a `NULL` `col_ptype` will get converted to
  # a 1 row, 0 col tibble for `elt_ptype`
  col_ptype <- list_of_ptype(col)
  elt_ptype <- elt_to_wide(col_ptype, name = name, strict = strict, names_sep = names_sep, error_call = error_call)
  elt_ptype <- vec_ptype(elt_ptype)

  # Avoid expensive dispatch from `[[.list_of`
  out <- tidyr_new_list(col)

  out <- with_indexed_errors(
    map(
      out,
      function(x) elt_to_wide(
        x = x,
        name = name,
        strict = strict,
        names_sep = names_sep,
        error_call = NULL
      )
    ),
    message = function(cnd) {
      c(
        i = cli::format_inline("In column: {.code {name}}."),
        i = cli::format_inline("In row: {cnd$location}.")
      )
    },
    .error_call = error_call
  )

  # In the sole case of a list_of<data_frame>, we can be sure that the
  # elements of `out` will all be of the same type. Otherwise,
  # - If `col` isn't a list-of, we don't know the element type.
  # - If `col` is a list-of but not one with a data frame ptype, then we
  #   don't actually know if all elements have the same ptype, because the
  #   number of resulting columns per element depends on that element's size.
  has_identical_elements <- is_list_of(col) && is.data.frame(col_ptype)

  if (has_identical_elements) {
    ptype <- elt_ptype
  } else {
    ptype <- vec_ptype_common(elt_ptype, !!!out)
    out <- vec_cast_common(!!!out, .to = ptype)
  }

  out <- new_list_of(out, ptype = ptype)

  out
}

# Convert a list element to a wide tibble with:
# - 1 row
# - N cols, where `N = vec_size(x)`
# - Column names come from `vec_names(x)`
# - Data frames are treated specially. They are treated like heterogeneous lists
#   where we know the type of each list element.
# - When !strict, lists are treated specially as well. Any typed empty elements
#   are replaced with `NULL` so their type doesn't interfere with the final
#   common type. This is extremely useful for JSON data,
#   where round tripping a typed empty cell results in an empty `list()` that
#   won't be able to combine with other typed non-empty cells. However, it
#   can create inconsistencies with vctrs typing rules.
elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
  if (is.null(x)) {
    x <- list()
  }

  if (!vec_is(x)) {
    cli::cli_abort(
      "List-column must only contain vectors.",
      call = error_call
    )
  }

  if (is.data.frame(x)) {
    # Extremely special case for data.frames,
    # which we want to treat like lists where we know the type of each element
    x <- tidyr_new_list(x)
    x <- map(x, list_of)
    names <- names2(x)
    x <- set_names(x, NULL)
  } else {
    if (!strict && vec_is_list(x)) {
      empty <- list_sizes(x) == 0L
      if (any(empty)) {
        # Can't use vec_assign(), see https://github.com/r-lib/vctrs/issues/1424
        x[empty] <- list(NULL)
      }
    }

    names <- vec_names2(x)
    x <- vec_set_names(x, NULL)
    x <- vec_chop(x)
  }

  empty <- names == ""
  any_empty <- any(empty)

  if (is.null(names_sep)) {
    if (any_empty) {
      stop_use_names_sep(error_call = error_call)
    }
  } else {
    if (any_empty) {
      names[empty] <- as.character(which(empty))
    }
    names <- apply_names_sep(name, names, names_sep)
  }

  x <- set_names(x, names)
  x <- new_data_frame(x, n = 1L)

  x
}

stop_use_names_sep <- function(error_call = caller_env()) {
  message <- c(
    "Can't unnest elements with missing names.",
    i = "Supply {.arg names_sep} to generate automatic names."
  )
  cli::cli_abort(message, call = error_call)
}