1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = T, comment = "#>")
options(tibble.print_min = 4L, tibble.print_max = 4L)
## ----setup--------------------------------------------------------------------
library(dplyr, warn.conflicts = FALSE)
## ----include = FALSE----------------------------------------------------------
nest_by <- function(df, ...) {
df %>%
group_by(...) %>%
summarise(data = list(pick(everything()))) %>%
rowwise(...)
}
# mtcars %>% nest_by(cyl)
## -----------------------------------------------------------------------------
df <- tibble(x = 1:2, y = 3:4, z = 5:6)
df %>% rowwise()
## -----------------------------------------------------------------------------
df %>% mutate(m = mean(c(x, y, z)))
df %>% rowwise() %>% mutate(m = mean(c(x, y, z)))
## -----------------------------------------------------------------------------
df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6)
df %>%
rowwise() %>%
summarise(m = mean(c(x, y, z)))
df %>%
rowwise(name) %>%
summarise(m = mean(c(x, y, z)))
## -----------------------------------------------------------------------------
df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45)
df
## -----------------------------------------------------------------------------
rf <- df %>% rowwise(id)
## -----------------------------------------------------------------------------
rf %>% mutate(total = sum(c(w, x, y, z)))
rf %>% summarise(total = sum(c(w, x, y, z)))
## -----------------------------------------------------------------------------
rf %>% mutate(total = sum(c_across(w:z)))
rf %>% mutate(total = sum(c_across(where(is.numeric))))
## -----------------------------------------------------------------------------
rf %>%
mutate(total = sum(c_across(w:z))) %>%
ungroup() %>%
mutate(across(w:z, ~ . / total))
## -----------------------------------------------------------------------------
df %>% mutate(total = rowSums(pick(where(is.numeric), -id)))
df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id)))
## ----eval = FALSE, include = FALSE--------------------------------------------
# bench::mark(
# df %>% mutate(m = rowSums(pick(x:z))),
# df %>% mutate(m = apply(pick(x:z), 1, sum)),
# df %>% rowwise() %>% mutate(m = sum(pick(x:z))),
# check = FALSE
# )
## -----------------------------------------------------------------------------
df <- tibble(
x = list(1, 2:3, 4:6)
)
## -----------------------------------------------------------------------------
df %>% mutate(l = length(x))
## -----------------------------------------------------------------------------
df %>% mutate(l = lengths(x))
## -----------------------------------------------------------------------------
df %>% mutate(l = sapply(x, length))
df %>% mutate(l = purrr::map_int(x, length))
## -----------------------------------------------------------------------------
df %>%
rowwise() %>%
mutate(l = length(x))
## -----------------------------------------------------------------------------
df <- tibble(g = 1:2, y = list(1:3, "a"))
gf <- df %>% group_by(g)
rf <- df %>% rowwise(g)
## -----------------------------------------------------------------------------
gf %>% mutate(type = typeof(y), length = length(y))
rf %>% mutate(type = typeof(y), length = length(y))
## -----------------------------------------------------------------------------
# grouped
out1 <- integer(2)
for (i in 1:2) {
out1[[i]] <- length(df$y[i])
}
out1
# rowwise
out2 <- integer(2)
for (i in 1:2) {
out2[[i]] <- length(df$y[[i]])
}
out2
## ----error = TRUE-------------------------------------------------------------
gf %>% mutate(y2 = y)
rf %>% mutate(y2 = y)
rf %>% mutate(y2 = list(y))
## -----------------------------------------------------------------------------
by_cyl <- mtcars %>% nest_by(cyl)
by_cyl
## -----------------------------------------------------------------------------
mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data)))
mods
## -----------------------------------------------------------------------------
mods <- mods %>% mutate(pred = list(predict(mod, data)))
mods
## -----------------------------------------------------------------------------
mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2)))
mods %>% summarise(rsq = summary(mod)$r.squared)
mods %>% summarise(broom::glance(mod))
## -----------------------------------------------------------------------------
mods %>% reframe(broom::tidy(mod))
## -----------------------------------------------------------------------------
df <- tribble(
~ n, ~ min, ~ max,
1, 0, 1,
2, 10, 100,
3, 100, 1000,
)
## -----------------------------------------------------------------------------
df %>%
rowwise() %>%
mutate(data = list(runif(n, min, max)))
## ----error = TRUE-------------------------------------------------------------
df %>%
rowwise() %>%
mutate(data = runif(n, min, max))
## -----------------------------------------------------------------------------
df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100))
df %>%
rowwise() %>%
mutate(data = list(rnorm(10, mean, sd)))
## -----------------------------------------------------------------------------
df <- tribble(
~rng, ~params,
"runif", list(n = 10),
"rnorm", list(n = 20),
"rpois", list(n = 10, lambda = 5),
) %>%
rowwise()
df %>%
mutate(data = list(do.call(rng, params)))
## ----include = FALSE, eval = FALSE--------------------------------------------
# df <- rowwise(tribble(
# ~rng, ~params,
# "runif", list(min = -1, max = 1),
# "rnorm", list(),
# "rpois", list(lambda = 5),
# ))
#
# # Has to happen in separate function to avoid eager unquoting
# f <- function(rng, params) purrr::exec(rng, n = 10, !!!params)
# df %>%
# mutate(data = list(f(rng, params)))
## -----------------------------------------------------------------------------
mtcars %>%
group_by(cyl) %>%
do(head(., 1))
## -----------------------------------------------------------------------------
mtcars %>%
group_by(cyl) %>%
reframe(head(pick(everything()), 1))
## -----------------------------------------------------------------------------
mtcars %>%
group_by(cyl) %>%
do(nrows = nrow(.))
## -----------------------------------------------------------------------------
mtcars %>%
group_by(cyl) %>%
summarise(nrows = nrow(pick(everything())))
|