File: joins.R

package info (click to toggle)
r-cran-tidygraph 1.3.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 880 kB
  • sloc: cpp: 41; sh: 13; makefile: 2
file content (158 lines) | stat: -rw-r--r-- 5,445 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#' @importFrom dplyr left_join
#' @export
left_join.tbl_graph <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) {
  d_tmp <- as_tibble(x)
  d_tmp <- left_join(d_tmp, y, by = by, copy = copy, suffix = suffix, ...)
  set_graph_data(x, d_tmp)
}
#' @export
dplyr::left_join

#' @importFrom dplyr right_join
#' @importFrom stats na.omit
#' @export
right_join.tbl_graph <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) {
  x <- unfocus(x)
  d_tmp <- as_tibble(x)
  check_reserved(d_tmp)
  if (active(x) == 'edges' && (!all(c('from', 'to') %in% names(y)) ||
                               !(is.numeric(y$from) && is.numeric(y$to)))) {
    cli::cli_abort('{.arg y} must contain the {.cls numeric} columns {.col from} and {.col to}')
  }
  orig_ind <- seq_len(nrow(d_tmp))
  d_tmp$.tbl_graph_index <- orig_ind
  d_tmp <- right_join(d_tmp, y, by = by, copy = copy, suffix = suffix, ...)
  new_order <- order(d_tmp$.tbl_graph_index) # Will never eclipse Joy Division
  d_tmp <- d_tmp[new_order, ]
  x <- slice(x, na.omit(d_tmp$.tbl_graph_index))
  new_rows <- which(is.na(d_tmp$.tbl_graph_index))
  d_tmp$.tbl_graph_index <- NULL
  x <- switch(
    active(x),
    nodes = add_vertices(x, length(new_rows)),
    edges = add_edges(x, as.vector(rbind(d_tmp$from[new_rows], d_tmp$to[new_rows])))
  ) %gr_attr% x
  x <- set_graph_data(x, d_tmp)
  arrange(x, seq_len(nrow(d_tmp))[new_order])
}
#' @export
dplyr::right_join

#' @importFrom dplyr inner_join
#' @export
inner_join.tbl_graph <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) {
  x <- unfocus(x)
  d_tmp <- as_tibble(x)
  check_reserved(d_tmp)
  orig_ind <- seq_len(nrow(d_tmp))
  d_tmp$.tbl_graph_index <- orig_ind
  d_tmp <- inner_join(d_tmp, y, by = by, copy = copy, suffix = suffix, ...)
  x <- slice(x, d_tmp$.tbl_graph_index)
  d_tmp$.tbl_graph_index <- NULL
  set_graph_data(x, d_tmp)
}
#' @export
dplyr::inner_join

#' @importFrom dplyr full_join
#' @importFrom igraph add_vertices add_edges
#' @export
full_join.tbl_graph <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) {
  x <- unfocus(x)
  d_tmp <- as_tibble(x)
  check_reserved(d_tmp)
  if (active(x) == 'edges' && (!all(c('from', 'to') %in% names(y)) ||
                               !(is.numeric(y$from) && is.numeric(y$to)))) {
    cli::cli_abort('{.arg y} must contain the {.cls numeric} columns {.col from} and {.col to}')
  }
  orig_ind <- seq_len(nrow(d_tmp))
  d_tmp$.tbl_graph_index <- orig_ind
  d_tmp <- full_join(d_tmp, y, by = by, copy = copy, suffix = suffix, ...)
  new_rows <- which(is.na(d_tmp$.tbl_graph_index))
  d_tmp$.tbl_graph_index <- NULL
  x <- switch(
    active(x),
    nodes = add_vertices(x, length(new_rows)),
    edges = add_edges(x, as.vector(rbind(d_tmp$from[new_rows], d_tmp$to[new_rows])))
  ) %gr_attr% x
  set_graph_data(x, d_tmp)
}
#' @export
dplyr::full_join

#' @importFrom dplyr semi_join
#' @export
semi_join.tbl_graph <- function(x, y, by = NULL, copy = FALSE, ...) {
  x <- unfocus(x)
  d_tmp <- as_tibble(x)
  check_reserved(d_tmp)
  orig_ind <- seq_len(nrow(d_tmp))
  d_tmp$.tbl_graph_index <- orig_ind
  d_tmp <- semi_join(d_tmp, y, by = by, copy = copy, ...)
  slice(x, d_tmp$.tbl_graph_index)
}
#' @export
dplyr::semi_join

#' @importFrom dplyr anti_join
#' @export
anti_join.tbl_graph <- function(x, y, by = NULL, copy = FALSE, ...) {
  x <- unfocus(x)
  d_tmp <- as_tibble(x)
  check_reserved(d_tmp)
  orig_ind <- seq_len(nrow(d_tmp))
  d_tmp$.tbl_graph_index <- orig_ind
  d_tmp <- anti_join(d_tmp, y, by = by, copy = copy, ...)
  slice(x, d_tmp$.tbl_graph_index)
}
#' @export
dplyr::anti_join

#' Join graphs on common nodes
#'
#' This graph-specific join method makes a full join on the nodes data and
#' updates the edges in the joining graph so they matches the new indexes of the
#' nodes in the resulting graph. Node and edge data is combined using
#' [dplyr::bind_rows()] semantic, meaning that data is matched by column name
#' and filled with `NA` if it is missing in either of the graphs.
#'
#' @param x A `tbl_graph`
#' @param y An object convertible to a `tbl_graph` using [as_tbl_graph()]
#' @inheritParams dplyr::full_join
#'
#' @return A `tbl_graph` containing the merged graph
#'
#' @importFrom tibble as_tibble
#' @importFrom dplyr full_join bind_rows
#' @export
#'
#' @examples
#' gr1 <- create_notable('bull') %>%
#'   activate(nodes) %>%
#'   mutate(name = letters[1:5])
#' gr2 <- create_ring(10) %>%
#'   activate(nodes) %>%
#'   mutate(name = letters[4:13])
#'
#' gr1 %>% graph_join(gr2)
graph_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) {
  x <- unfocus(x)
  check_tbl_graph(x)
  y <- as_tbl_graph(y)

  d_tmp <- as_tibble(x, active = 'nodes')
  d_tmp2 <- as_tibble(y, active = 'nodes')
  check_reserved(d_tmp)
  check_reserved(d_tmp2)
  orig_ind <- seq_len(nrow(d_tmp2))
  d_tmp2$.tbl_graph_index <- orig_ind
  nodes <- full_join(d_tmp, d_tmp2, by = by, copy = copy, suffix = suffix, ...)
  ind_lookup <- data.frame(new = seq_len(nrow(nodes)), old = nodes$.tbl_graph_index)
  nodes$.tbl_graph_index <- NULL
  edges <- as_tibble(x, active = 'edges')
  edges2 <- as_tibble(y, active = 'edges')
  edges2$from <- ind_lookup$new[match(edges2$from, ind_lookup$old)]
  edges2$to <- ind_lookup$new[match(edges2$to, ind_lookup$old)]
  edges <- bind_rows(edges, edges2)
  as_tbl_graph(list(nodes = nodes, edges = edges)) %gr_attr% y %gr_attr% x
}