File: dbplyr.R

package info (click to toggle)
r-cran-dbplyr 2.3.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 2,376 kB
  • sloc: sh: 13; makefile: 2
file content (69 lines) | stat: -rw-r--r-- 2,177 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
options(tibble.print_min = 6L, tibble.print_max = 6L, digits = 3)

## ---- eval = FALSE------------------------------------------------------------
#  install.packages("dbplyr")

## ----setup, message = FALSE---------------------------------------------------
library(dplyr)
con <- DBI::dbConnect(RSQLite::SQLite(), dbname = ":memory:")

## ---- eval = FALSE------------------------------------------------------------
#  con <- DBI::dbConnect(RMariaDB::MariaDB(),
#    host = "database.rstudio.com",
#    user = "hadley",
#    password = rstudioapi::askForPassword("Database password")
#  )

## -----------------------------------------------------------------------------
copy_to(con, nycflights13::flights, "flights",
  temporary = FALSE, 
  indexes = list(
    c("year", "month", "day"), 
    "carrier", 
    "tailnum",
    "dest"
  )
)

## -----------------------------------------------------------------------------
flights_db <- tbl(con, "flights")

## -----------------------------------------------------------------------------
flights_db 

## -----------------------------------------------------------------------------
flights_db %>% select(year:day, dep_delay, arr_delay)

flights_db %>% filter(dep_delay > 240)

flights_db %>% 
  group_by(dest) %>%
  summarise(delay = mean(dep_delay))

## -----------------------------------------------------------------------------
tailnum_delay_db <- flights_db %>% 
  group_by(tailnum) %>%
  summarise(
    delay = mean(arr_delay),
    n = n()
  ) %>% 
  arrange(desc(delay)) %>%
  filter(n > 100)

## -----------------------------------------------------------------------------
tailnum_delay_db

## -----------------------------------------------------------------------------
tailnum_delay_db %>% show_query()

## -----------------------------------------------------------------------------
tailnum_delay <- tailnum_delay_db %>% collect()
tailnum_delay

## ---- error = TRUE------------------------------------------------------------
nrow(tailnum_delay_db)

tail(tailnum_delay_db)