1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
options(tibble.print_min = 6L, tibble.print_max = 6L, digits = 3)
## ---- eval = FALSE------------------------------------------------------------
# install.packages("dbplyr")
## ----setup, message = FALSE---------------------------------------------------
library(dplyr)
con <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:")
## ---- eval = FALSE------------------------------------------------------------
# con <- DBI::dbConnect(RMariaDB::MariaDB(),
# host = "database.rstudio.com",
# user = "hadley",
# password = rstudioapi::askForPassword("Database password")
# )
## -----------------------------------------------------------------------------
copy_to(con, nycflights13::flights, "flights",
temporary = FALSE,
indexes = list(
c("year", "month", "day"),
"carrier",
"tailnum",
"dest"
)
)
## -----------------------------------------------------------------------------
flights_db <- tbl(con, "flights")
## -----------------------------------------------------------------------------
flights_db
## -----------------------------------------------------------------------------
flights_db %>% select(year:day, dep_delay, arr_delay)
flights_db %>% filter(dep_delay > 240)
flights_db %>%
group_by(dest) %>%
summarise(delay = mean(dep_delay))
## -----------------------------------------------------------------------------
tailnum_delay_db <- flights_db %>%
group_by(tailnum) %>%
summarise(
delay = mean(arr_delay),
n = n()
) %>%
arrange(desc(delay)) %>%
filter(n > 100)
## -----------------------------------------------------------------------------
tailnum_delay_db
## -----------------------------------------------------------------------------
tailnum_delay_db %>% show_query()
## -----------------------------------------------------------------------------
tailnum_delay <- tailnum_delay_db %>% collect()
tailnum_delay
## ---- error = TRUE------------------------------------------------------------
nrow(tailnum_delay_db)
tail(tailnum_delay_db)
|