1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
|
#' Unnest a list-column into columns
#'
#' @description
#' `unnest_wider()` turns each element of a list-column into a column. It
#' is most naturally suited to list-columns where every element is named,
#' and the names are consistent from row-to-row.
#' `unnest_wider()` preserves the rows of `x` while modifying the columns.
#'
#' Learn more in `vignette("rectangle")`.
#'
#' @inheritParams unnest_longer
#' @param names_sep If `NULL`, the default, the names will be left
#' as is. If a string, the outer and inner names will be pasted together using
#' `names_sep` as a separator.
#'
#' If any values being unnested are unnamed, then `names_sep` must be
#' supplied, otherwise an error is thrown. When `names_sep` is supplied,
#' names are automatically generated for unnamed values as an increasing
#' sequence of integers.
#' @param strict A single logical specifying whether or not to apply strict
#' vctrs typing rules. If `FALSE`, typed empty values (like `list()` or
#' `integer()`) nested within list-columns will be treated like `NULL` and
#' will not contribute to the type of the unnested column. This is useful
#' when working with JSON, where empty values tend to lose their type
#' information and show up as `list()`.
#' @family rectangling
#' @export
#' @examples
#' df <- tibble(
#' character = c("Toothless", "Dory"),
#' metadata = list(
#' list(
#' species = "dragon",
#' color = "black",
#' films = c(
#' "How to Train Your Dragon",
#' "How to Train Your Dragon 2",
#' "How to Train Your Dragon: The Hidden World"
#' )
#' ),
#' list(
#' species = "blue tang",
#' color = "blue",
#' films = c("Finding Nemo", "Finding Dory")
#' )
#' )
#' )
#' df
#'
#' # Turn all components of metadata into columns
#' df %>% unnest_wider(metadata)
#'
#' # Choose not to simplify list-cols of length-1 elements
#' df %>% unnest_wider(metadata, simplify = FALSE)
#' df %>% unnest_wider(metadata, simplify = list(color = FALSE))
#'
#' # You can also widen unnamed list-cols:
#' df <- tibble(
#' x = 1:3,
#' y = list(NULL, 1:3, 4:5)
#' )
#' # but you must supply `names_sep` to do so, which generates automatic names:
#' df %>% unnest_wider(y, names_sep = "_")
#'
#' # 0-length elements ---------------------------------------------------------
#' # The defaults of `unnest_wider()` treat empty types (like `list()`) as `NULL`.
#' json <- list(
#' list(x = 1:2, y = 1:2),
#' list(x = list(), y = 3:4),
#' list(x = 3L, y = list())
#' )
#'
#' df <- tibble(json = json)
#' df %>%
#' unnest_wider(json)
#'
#' # To instead enforce strict vctrs typing rules, use `strict`
#' df %>%
#' unnest_wider(json, strict = TRUE)
unnest_wider <- function(data,
col,
names_sep = NULL,
simplify = TRUE,
strict = FALSE,
names_repair = "check_unique",
ptype = NULL,
transform = NULL) {
check_data_frame(data)
check_required(col)
check_string(names_sep, allow_null = TRUE)
check_bool(strict)
error_call <- current_env()
cols <- tidyselect::eval_select(enquo(col), data, allow_rename = FALSE)
col_names <- names(cols)
for (i in seq_along(cols)) {
col <- cols[[i]]
col_name <- col_names[[i]]
data[[col]] <- col_to_wide(
col = data[[col]],
name = col_name,
strict = strict,
names_sep = names_sep
)
}
data <- unchop(data, all_of(col_names), error_call = error_call)
for (i in seq_along(cols)) {
col <- cols[[i]]
data[[col]] <- df_simplify(
x = data[[col]],
ptype = ptype,
transform = transform,
simplify = simplify
)
}
unpack(
data = data,
cols = all_of(col_names),
names_repair = names_repair,
error_call = error_call
)
}
# Converts a column of any type to a `list_of<tbl>`
col_to_wide <- function(col, name, strict, names_sep, error_call = caller_env()) {
if (!vec_is_list(col)) {
ptype <- vec_ptype(col)
col <- vec_chop(col)
col <- new_list_of(col, ptype = ptype)
}
# If we don't have a list_of, then a `NULL` `col_ptype` will get converted to
# a 1 row, 0 col tibble for `elt_ptype`
col_ptype <- list_of_ptype(col)
elt_ptype <- elt_to_wide(col_ptype, name = name, strict = strict, names_sep = names_sep, error_call = error_call)
elt_ptype <- vec_ptype(elt_ptype)
# Avoid expensive dispatch from `[[.list_of`
out <- tidyr_new_list(col)
out <- with_indexed_errors(
map(
out,
function(x) elt_to_wide(
x = x,
name = name,
strict = strict,
names_sep = names_sep,
error_call = NULL
)
),
message = function(cnd) {
c(
i = cli::format_inline("In column: {.code {name}}."),
i = cli::format_inline("In row: {cnd$location}.")
)
},
.error_call = error_call
)
# In the sole case of a list_of<data_frame>, we can be sure that the
# elements of `out` will all be of the same type. Otherwise,
# - If `col` isn't a list-of, we don't know the element type.
# - If `col` is a list-of but not one with a data frame ptype, then we
# don't actually know if all elements have the same ptype, because the
# number of resulting columns per element depends on that element's size.
has_identical_elements <- is_list_of(col) && is.data.frame(col_ptype)
if (has_identical_elements) {
ptype <- elt_ptype
} else {
ptype <- vec_ptype_common(elt_ptype, !!!out)
out <- vec_cast_common(!!!out, .to = ptype)
}
out <- new_list_of(out, ptype = ptype)
out
}
# Convert a list element to a wide tibble with:
# - 1 row
# - N cols, where `N = vec_size(x)`
# - Column names come from `vec_names(x)`
# - Data frames are treated specially. They are treated like heterogeneous lists
# where we know the type of each list element.
# - When !strict, lists are treated specially as well. Any typed empty elements
# are replaced with `NULL` so their type doesn't interfere with the final
# common type. This is extremely useful for JSON data,
# where round tripping a typed empty cell results in an empty `list()` that
# won't be able to combine with other typed non-empty cells. However, it
# can create inconsistencies with vctrs typing rules.
elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
if (is.null(x)) {
x <- list()
}
if (!vec_is(x)) {
cli::cli_abort(
"List-column must only contain vectors.",
call = error_call
)
}
if (is.data.frame(x)) {
# Extremely special case for data.frames,
# which we want to treat like lists where we know the type of each element
x <- tidyr_new_list(x)
x <- map(x, list_of)
names <- names2(x)
x <- set_names(x, NULL)
} else {
if (!strict && vec_is_list(x)) {
empty <- list_sizes(x) == 0L
if (any(empty)) {
# Can't use vec_assign(), see https://github.com/r-lib/vctrs/issues/1424
x[empty] <- list(NULL)
}
}
names <- vec_names2(x)
x <- vec_set_names(x, NULL)
x <- vec_chop(x)
}
empty <- names == ""
any_empty <- any(empty)
if (is.null(names_sep)) {
if (any_empty) {
stop_use_names_sep(error_call = error_call)
}
} else {
if (any_empty) {
names[empty] <- as.character(which(empty))
}
names <- apply_names_sep(name, names, names_sep)
}
x <- set_names(x, names)
x <- new_data_frame(x, n = 1L)
x
}
stop_use_names_sep <- function(error_call = caller_env()) {
message <- c(
"Can't unnest elements with missing names.",
i = "Supply {.arg names_sep} to generate automatic names."
)
cli::cli_abort(message, call = error_call)
}
|