1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
|
#' Network data
#'
#' Retrieve data to know which users are connected to which users.
#'
#' @description
#' * `network_data()` returns a data frame that can easily be converted to
#' various network classes.
#' * `network_graph()` returns a igraph object
#'
#' @param x Data frame returned by rtweet function
#' @param e Type of edge/link–i.e., "mention", "retweet", "quote", "reply".
#' This must be a character vector of length one or more. This value will be
#' split on punctuation and space (so you can include multiple types in the
#' same string separated by a comma or space). The values "all" and
#' "semantic" are assumed to mean all edge types, which is equivalent to the
#' default value of `c("mention", "retweet", "reply", "quote")`
#' @return A from/to data edge data frame
#' @seealso network_graph
#' @examples
#' if (auth_has_default()) {
#' ## search for #rstats tweets
#' rstats <- search_tweets("#rstats", n = 200)
#'
#' ## create from-to data frame representing retweet/mention/reply connections
#' rstats_net <- network_data(rstats, c("retweet","mention","reply"))
#'
#' ## view edge data frame
#' rstats_net
#'
#' ## view user_id->screen_name index
#' attr(rstats_net, "idsn")
#'
#' ## if igraph is installed...
#' if (requireNamespace("igraph", quietly = TRUE)) {
#'
#' ## (1) convert directly to graph object representing semantic network
#' rstats_net <- network_graph(rstats)
#'
#' ## (2) plot graph via igraph.plotting
#' plot(rstats_net)
#' }
#' }
#' @export
network_data <- function(x, e = c("mention", "retweet", "reply", "quote")) {
if (isTRUE(e) || (length(e) == 1 && e %in% c("semantics", "all"))) {
e <- c("mention", "retweet", "reply", "quote")
}
stopifnot(is.character(e))
y <- users_data(x)
ids <- character()
screen_names <- character()
if ("mention" %in% e) {
user_mentions <- lapply(x$entities, function(x){
if (has_name_(x, "user_mentions")) {
y <- x$user_mentions
if (length(y$id_str) > 1 || !is.na(y$id_str)) {
return(y[, c("screen_name", "id_str")])
}
}
NULL
})
k <- vapply(user_mentions, is.null, logical(1L))
# If no mention skip
if (!all(k)) {
r <- do.call(rbind, user_mentions[!k])
ids <- c(ids, r$id_str, y[!k, "id_str", drop = TRUE])
screen_names <- c(screen_names, r$screen_name, y[!k, "screen_name", drop = TRUE])
mention <- data.frame(from = rep(y[!k, "id_str", drop = TRUE], times = vapply(user_mentions[!k], nrow, numeric(1L))),
to = r$id_str,
type = "mention")
} else {
mention <- data.frame(from = NA, to = NA, type = NA)[0, , drop = FALSE]
}
} else {
mention <- data.frame(from = NA, to = NA, type = NA)[0, , drop = FALSE]
}
if ("retweet" %in% e) {
retweet0 <- data.frame(from = NA, to = NA, type = NA)[0, , drop = FALSE]
# Retweets are those that the text start with RT and a mention but are not quoted
retweets <- startsWith(x$text, "RT @")
if (any(retweets)) {
r <- x[retweets, ]
yr <- y[retweets, ]
user_mentions <- lapply(r$entities, function(x){
y <- x$user_mentions
# Pick the first mention that is the one the tweet is quoting
# Example: 1390785143615467524
return(y[y$indices$start == 3, c("screen_name", "id_str")])
})
um <- do.call(rbind, user_mentions)
ur <- yr[, c("screen_name", "id_str")]
# remove content from deleted users
um_r <- vapply(user_mentions, nrow, numeric(1L))
removed_users <- which(um_r == 0)
for (i in seq_along(ur$id_str)) {
if (i %in% removed_users) {
to_id <- NA_character_
to_screen_name <- NA_character_
} else {
to_id <- um$id_str[i]
to_screen_name <- um$screen_name[i]
}
ids <- c(ids, ur$id_str[i], to_id)
screen_names <- c(screen_names, ur$screen_name[i], to_screen_name)
retweet <- data.frame(from = ur$id_str[i],
to = to_id,
type = "retweet")
retweet0 <- rbind(retweet0, retweet)
}
}
retweet <- retweet0
} else {
retweet <- data.frame(from = NA, to = NA, type = NA)[0, , drop = FALSE]
}
if ("reply" %in% e && !all(is.na(x$in_reply_to_user_id_str))) {
reply_keep <- !is.na(x$in_reply_to_user_id_str)
ids <- c(ids, y[["id_str"]][reply_keep], x[["in_reply_to_user_id_str"]][reply_keep])
screen_names <- c(screen_names, y[["screen_name"]][reply_keep], x[["in_reply_to_screen_name"]][reply_keep])
reply <- data.frame(from = y[["id_str"]][reply_keep],
to = x[["in_reply_to_user_id_str"]][reply_keep],
type = "reply")
} else {
reply <- data.frame(from = NA, to = NA, type = NA)[0, , drop = FALSE]
}
if ("quote" %in% e && !all(is.na(x$is_quote_status))) {
r <- x[x$is_quote_status, ]
yr <- y[x$is_quote_status, c("screen_name", "id_str")]
# Quotes are from users on entities$user_mentions whose indices start at 3
if (is.data.frame(r$quoted_status$user)) {
um <- r$quoted_status$user[, c("screen_name", "id_str")]
} else {
user_mentions <- lapply(r$quoted_status$user, function(x){
# Pick the first mention that is the one the tweet is quoting
# Example: 1390785143615467524
return(x[, c("screen_name", "id_str")])
})
um <- do.call(rbind, user_mentions)
}
ums <- is.na(um[, 1])
if (!is.null(nrow(ums))) {
um <- um[!ums, ]
yr <- yr[!ums, ]
ids <- c(ids, um$id_str, yr$id_str)
screen_names <- c(screen_names, um$screen_name, yr$screen_name)
quote <- data.frame(from = um$id_str,
to = yr$id_str,
type = "quote")
} else {
quote <- data.frame(from = NA, to = NA, type = NA)[0, , drop = FALSE]
}
} else {
quote <- data.frame(from = NA, to = NA, type = NA)[0, , drop = FALSE]
}
out <- rbind(mention, retweet, reply, quote)
out <- out[!is.na(out$type), ]
idsn <- data.frame(id = ids, sn = screen_names)
idsn <- unique(idsn)
stopifnot(all(out$from %in% idsn$id))
stopifnot(all(out$to %in% idsn$id))
attr(out, "idsn") <- as.list(idsn)
out
}
#' @return An igraph object
#' @rdname network_data
#' @export
network_graph <- function(x, e = c("mention", "retweet", "reply", "quote")) {
if (!requireNamespace("igraph", quietly = TRUE)) {
stop(
"Please install the {igraph} package to use this function",
call. = FALSE
)
}
nd <- network_data(x = x, e = e)
idsn <- attr(nd, "idsn")
g <- igraph::make_empty_graph(n = 0, directed = TRUE)
g <- igraph::add_vertices(g, length(idsn$id),
attr = list(id = idsn$id, name = idsn$sn))
edges <- rbind(match(nd[[1]], idsn$id), match(nd[[2]], idsn$id))
igraph::add_edges(g, edges, attr = list(type = nd[[3]]))
}
|