File: rowwise.R

package info (click to toggle)
r-cran-dplyr 1.1.4-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,292 kB
  • sloc: cpp: 1,403; sh: 17; makefile: 7
file content (207 lines) | stat: -rw-r--r-- 6,547 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = T, comment = "#>")
options(tibble.print_min = 4L, tibble.print_max = 4L)

## ----setup--------------------------------------------------------------------
library(dplyr, warn.conflicts = FALSE)

## ----include = FALSE----------------------------------------------------------
nest_by <- function(df, ...) {
  df %>%
    group_by(...) %>% 
    summarise(data = list(pick(everything()))) %>% 
    rowwise(...)
}
# mtcars %>% nest_by(cyl)

## -----------------------------------------------------------------------------
df <- tibble(x = 1:2, y = 3:4, z = 5:6)
df %>% rowwise()

## -----------------------------------------------------------------------------
df %>% mutate(m = mean(c(x, y, z)))
df %>% rowwise() %>% mutate(m = mean(c(x, y, z)))

## -----------------------------------------------------------------------------
df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6)

df %>% 
  rowwise() %>% 
  summarise(m = mean(c(x, y, z)))

df %>% 
  rowwise(name) %>% 
  summarise(m = mean(c(x, y, z)))

## -----------------------------------------------------------------------------
df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45)
df

## -----------------------------------------------------------------------------
rf <- df %>% rowwise(id)

## -----------------------------------------------------------------------------
rf %>% mutate(total = sum(c(w, x, y, z)))
rf %>% summarise(total = sum(c(w, x, y, z)))

## -----------------------------------------------------------------------------
rf %>% mutate(total = sum(c_across(w:z)))
rf %>% mutate(total = sum(c_across(where(is.numeric))))

## -----------------------------------------------------------------------------
rf %>% 
  mutate(total = sum(c_across(w:z))) %>% 
  ungroup() %>% 
  mutate(across(w:z, ~ . / total))

## -----------------------------------------------------------------------------
df %>% mutate(total = rowSums(pick(where(is.numeric), -id)))
df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id)))

## ----eval = FALSE, include = FALSE--------------------------------------------
#  bench::mark(
#    df %>% mutate(m = rowSums(pick(x:z))),
#    df %>% mutate(m = apply(pick(x:z), 1, sum)),
#    df %>% rowwise() %>% mutate(m = sum(pick(x:z))),
#    check = FALSE
#  )

## -----------------------------------------------------------------------------
df <- tibble(
  x = list(1, 2:3, 4:6)
)

## -----------------------------------------------------------------------------
df %>% mutate(l = length(x))

## -----------------------------------------------------------------------------
df %>% mutate(l = lengths(x))

## -----------------------------------------------------------------------------
df %>% mutate(l = sapply(x, length))
df %>% mutate(l = purrr::map_int(x, length))

## -----------------------------------------------------------------------------
df %>% 
  rowwise() %>% 
  mutate(l = length(x))

## -----------------------------------------------------------------------------
df <- tibble(g = 1:2, y = list(1:3, "a"))
gf <- df %>% group_by(g)
rf <- df %>% rowwise(g)

## -----------------------------------------------------------------------------
gf %>% mutate(type = typeof(y), length = length(y))
rf %>% mutate(type = typeof(y), length = length(y))

## -----------------------------------------------------------------------------
# grouped
out1 <- integer(2)
for (i in 1:2) {
  out1[[i]] <- length(df$y[i])
}
out1

# rowwise
out2 <- integer(2)
for (i in 1:2) {
  out2[[i]] <- length(df$y[[i]])
}
out2

## ----error = TRUE-------------------------------------------------------------
gf %>% mutate(y2 = y)
rf %>% mutate(y2 = y)
rf %>% mutate(y2 = list(y))

## -----------------------------------------------------------------------------
by_cyl <- mtcars %>% nest_by(cyl)
by_cyl

## -----------------------------------------------------------------------------
mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data)))
mods

## -----------------------------------------------------------------------------
mods <- mods %>% mutate(pred = list(predict(mod, data)))
mods

## -----------------------------------------------------------------------------
mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2)))
mods %>% summarise(rsq = summary(mod)$r.squared)
mods %>% summarise(broom::glance(mod))

## -----------------------------------------------------------------------------
mods %>% reframe(broom::tidy(mod))

## -----------------------------------------------------------------------------
df <- tribble(
  ~ n, ~ min, ~ max,
    1,     0,     1,
    2,    10,   100,
    3,   100,  1000,
)

## -----------------------------------------------------------------------------
df %>% 
  rowwise() %>% 
  mutate(data = list(runif(n, min, max)))

## ----error = TRUE-------------------------------------------------------------
df %>% 
  rowwise() %>% 
  mutate(data = runif(n, min, max))

## -----------------------------------------------------------------------------
df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100))

df %>% 
  rowwise() %>% 
  mutate(data = list(rnorm(10, mean, sd)))

## -----------------------------------------------------------------------------
df <- tribble(
   ~rng,     ~params,
   "runif",  list(n = 10), 
   "rnorm",  list(n = 20),
   "rpois",  list(n = 10, lambda = 5),
) %>%
  rowwise()

df %>% 
  mutate(data = list(do.call(rng, params)))

## ----include = FALSE, eval = FALSE--------------------------------------------
#  df <- rowwise(tribble(
#     ~rng,     ~params,
#     "runif",  list(min = -1, max = 1),
#     "rnorm",  list(),
#     "rpois",  list(lambda = 5),
#  ))
#  
#  # Has to happen in separate function to avoid eager unquoting
#  f <- function(rng, params) purrr::exec(rng, n = 10, !!!params)
#  df %>%
#    mutate(data = list(f(rng, params)))

## -----------------------------------------------------------------------------
mtcars %>% 
  group_by(cyl) %>% 
  do(head(., 1))

## -----------------------------------------------------------------------------
mtcars %>% 
  group_by(cyl) %>% 
  reframe(head(pick(everything()), 1))

## -----------------------------------------------------------------------------
mtcars %>% 
  group_by(cyl) %>% 
  do(nrows = nrow(.))

## -----------------------------------------------------------------------------
mtcars %>% 
  group_by(cyl) %>% 
  summarise(nrows = nrow(pick(everything())))