1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/search_tweets.R
\name{search_tweets}
\alias{search_tweets}
\alias{search_tweets2}
\title{Get tweets data on statuses identified via search query.}
\usage{
search_tweets(
q,
n = 100,
type = c("mixed", "recent", "popular"),
include_rts = TRUE,
geocode = NULL,
since_id = NULL,
max_id = NULL,
parse = TRUE,
token = NULL,
retryonratelimit = NULL,
verbose = TRUE,
...
)
search_tweets2(...)
}
\arguments{
\item{q}{Query to be searched, used to filter and select tweets to
return from Twitter's REST API. Must be a character string not to
exceed maximum of 500 characters. Spaces behave like boolean
"AND" operator. To search for tweets containing at least one of
multiple possible terms, separate each search term with spaces
and "OR" (in caps). For example, the search \code{q = "data science"} looks for tweets containing both "data" and
"science" located anywhere in the tweets and in any order.
When "OR" is entered between search terms, \code{query = "data OR science"}, Twitter's REST API should return any tweet
that contains either "data" or "science." It is also possible to
search for exact phrases using double quotes. To do this, either
wrap single quotes around a search query using double quotes,
e.g., \code{q = '"data science"'} or escape each internal double
quote with a single backslash, e.g., \verb{q = "\\"data science\\""}.
Some other useful query tips:
\itemize{
\item Exclude retweets via \code{"-filter:retweets"}
\item Exclude quotes via \code{"-filter:quote"}
\item Exclude replies via \code{"-filter:replies"}
\item Filter (return only) verified via \code{"filter:verified"}
\item Exclude verified via \code{"-filter:verified"}
\item Get everything (firehose for free) via \code{"-filter:verified OR filter:verified"}
\item Filter (return only) tweets with links to news articles via \code{"filter:news"}
\item Filter (return only) tweets with media \code{"filter:media"}
}}
\item{n}{Desired number of results to return. Results are downloaded
in pages when \code{n} is large; the default value will download a single
page. Set \code{n = Inf} to download as many results as possible.
The Twitter API rate limits the number of requests you can perform
in each 15 minute period. The easiest way to download more than that is
to use \code{retryonratelimit = TRUE}.
You are not guaranteed to get exactly \code{n} results back. You will get
fewer results when tweets have been deleted or if you hit a rate limit.
You will get more results if you ask for a number of tweets that's not
a multiple of page size, e.g. if you request \code{n = 150} and the page
size is 200, you'll get 200 results back.}
\item{type}{Character string specifying which type of search
results to return from Twitter's REST API. The current default is
\code{type = "recent"}, other valid types include \code{type = "mixed"} and \code{type = "popular"}.}
\item{include_rts}{Logical, indicating whether to include retweets
in search results. Retweets are classified as any tweet generated
by Twitter's built-in "retweet" (recycle arrows) function. These
are distinct from quotes (retweets with additional text provided
from sender) or manual retweets (old school method of manually
entering "RT" into the text of one's tweets).}
\item{geocode}{Geographical limiter of the template
"latitude,longitude,radius" e.g., \code{geocode = "37.78,-122.40,1mi"}.}
\item{since_id}{Supply a vector of ids or a data frame of previous results to
find tweets \strong{newer} than \code{since_id}.}
\item{max_id}{Supply a vector of ids or a data frame of previous results to
find tweets \strong{older} than \code{max_id}.}
\item{parse}{If \code{TRUE}, the default, returns a tidy data frame. Use \code{FALSE}
to return the "raw" list corresponding to the JSON returned from the
Twitter API.}
\item{token}{Expert use only. Use this to override authentication for
a single API call. In most cases you are better off changing the
default for all calls. See \code{\link[=auth_as]{auth_as()}} for details.}
\item{retryonratelimit}{If \code{TRUE}, and a rate limit is exhausted, will wait
until it refreshes. Most Twitter rate limits refresh every 15 minutes.
If \code{FALSE}, and the rate limit is exceeded, the function will terminate
early with a warning; you'll still get back all results received up to
that point. The default value, \code{NULL}, consults the option
\code{rtweet.retryonratelimit} so that you can globally set it to \code{TRUE},
if desired.
If you expect a query to take hours or days to perform, you should not
rely soley on \code{retryonratelimit} because it does not handle other common
failure modes like temporarily losing your internet connection.}
\item{verbose}{Show progress bars and other messages indicating current
progress?}
\item{...}{Further arguments passed as query parameters in request
sent to Twitter's REST API. To return only English language
tweets, for example, use \code{lang = "en"}. For more options see
Twitter's API documentation.}
}
\value{
List object with tweets and users each returned as a
data frame.
A tbl data frame with additional "query" column.
}
\description{
Returns Twitter statuses matching a user provided search
query. ONLY RETURNS DATA FROM THE PAST 6-9 DAYS.
search_tweets2 Passes all arguments to search_tweets. Returns data from
one OR MORE search queries.
}
\details{
Twitter API documentation recommends limiting searches to
10 keywords and operators. Complex queries may also produce API
errors preventing recovery of information related to the query.
It should also be noted Twitter's search API does not consist of
an index of all Tweets. At the time of searching, the search API
index includes between only 6-9 days of Tweets.
}
\examples{
if (auth_has_default()) {
tweets <- search_tweets("weather")
tweets
# data about the users who made those tweets
users_data(tweets)
# Retrieve all the tweets made since the previous request
# (there might not be any if people aren't tweeting about the weather)
newer <- search_tweets("weather", since_id = tweets)
# Retrieve tweets made before the previous request
older <- search_tweets("weather", max_id = tweets)
# Restrict to English only, and ignore retweets
tweets2 <- search_tweets("weather", lang = "en", include_rts = FALSE)
}
if (auth_has_default()) {
## search using multiple queries
st2 <- search_tweets2(
c("\"data science\"", "rstats OR python"),
n = 500
)
## preview tweets data
st2
## preview users data
users_data(st2)
## check breakdown of results by search query
table(st2$query)
}
}
\references{
\url{https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/api-reference/get-search-tweets}
}
\seealso{
Other tweets:
\code{\link{get_favorites}()},
\code{\link{get_mentions}()},
\code{\link{get_timeline}()},
\code{\link{lists_statuses}()},
\code{\link{lookup_tweets}()}
}
\concept{tweets}
|