1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
# Environment in which cookies will be stored. Cookies are expected to survive
# the duration of the R session, but are not persisted outside of the R
# session.
.cookieStore <- new.env(parent = emptyenv())
# Returns the cookies associated with a particular host/port
# If no hostname is specified, returns all cookies
getCookies <- function(hostname, port = NULL) {
if (missing(hostname)) {
hosts <- ls(envir = .cookieStore)
cookies <- lapply(hosts, function(h) {
getCookiesHostname(h)
})
do.call("rbind", cookies)
} else {
host <- getCookieHost(list(host = hostname, port = port))
getCookiesHostname(host)
}
}
# Get cookies for a particular hostname(:port)
getCookiesHostname <- function(host) {
if (!exists(host, .cookieStore)) {
NULL
} else {
cookies <- get(host, envir = .cookieStore)
cookies$host <- host
cookies
}
}
# Clears the cookies associated with a particular hostname/port combination.
# If hostname and port are omitted, clears all the cookies
clearCookies <- function(hostname, port = NULL) {
if (missing(hostname)) {
rm(list = ls(envir = .cookieStore), envir = .cookieStore)
} else {
host <- getCookieHost(list(host = hostname, port = port))
rm(list = host, envir = .cookieStore)
}
}
# Parse out the raw headers provided and insert them into the cookieStore
# NOTE: Domain attribute is currently ignored
# @param requestURL the parsed URL as returned from `parseHttpUrl`
# @param cookieHeaders a list of characters strings representing the raw
# Set-Cookie header value with the "Set-Cookie: " prefix omitted
storeCookies <- function(requestURL, cookieHeaders) {
cookies <- lapply(cookieHeaders, parseCookie, requestPath = requestURL$path)
# Filter out invalid cookies (which would return as NULL)
cookies <- Filter(Negate(is.null), cookies)
host <- getCookieHost(requestURL)
hostCookies <- NULL
if (!exists(host, .cookieStore)) {
# Create a new data frame for this host
hostCookies <- data.frame(
path = character(0),
name = character(0),
value = character(0),
secure = logical(0),
expires = character(0),
stringsAsFactors = FALSE
)
} else {
hostCookies <- get(host, envir = .cookieStore)
}
lapply(cookies, function(co) {
# Remove any duplicates
# RFC says duplicate cookies are ones that have the same domain, name, and path
hostCookies <<- hostCookies[!(co$name == hostCookies$name & co$path == hostCookies$path), ]
# append this new cookie on
hostCookies <<- rbind(as.data.frame(co, stringsAsFactors = FALSE), hostCookies)
})
# Save this host's cookies into the cookies store.
assign(host, hostCookies, envir = .cookieStore)
}
# Parse out an individual cookie
# @param cookieHeader the raw text contents of the Set-Cookie header with the
# header name omitted.
# @param requestPath the parsed URL as returned from `parseHttpUrl`
parseCookie <- function(cookieHeader, requestPath = NULL) {
keyval <- regmatches(cookieHeader, regexec(
# https://curl.haxx.se/rfc/cookie_spec.html
# "characters excluding semi-colon, comma and white space"
# white space is not excluded from values so we can capture `expires`
"^([^;=, ]+)\\s*=\\s*([^;,]*)(;|$)", cookieHeader, ignore.case = TRUE))[[1]]
if (length(keyval) == 0) {
# Invalid cookie format.
warning("Unable to parse set-cookie header: ", cookieHeader)
return(NULL)
}
key <- keyval[2]
val <- keyval[3]
# Path
path <- regmatches(cookieHeader, regexec(
"^.*\\sPath\\s*=\\s*([^;]+)(;|$).*$", cookieHeader, ignore.case = TRUE))[[1]]
if (length(path) == 0) {
path <- "/"
} else {
path <- path[2]
}
# Per the RFC, the cookie's path must be a prefix of the request URL
if (!is.null(requestPath) && !hasPrefix(requestPath, path)) {
warning("Invalid path set for cookie on request for '", requestPath, "': ", cookieHeader)
return(NULL)
}
# MaxAge
maxage <- regmatches(cookieHeader, regexec(
"^.*\\sMax-Age\\s*=\\s*(-?\\d+)(;|$).*$", cookieHeader, ignore.case = TRUE))[[1]]
# If no maxage specified, then this is a session cookie, which means that
# (since our cookies only survive for a single session anyways...) we should
# keep this cookie around as long as we're alive.
expires <- Sys.time() + 10^10
if (length(maxage) > 0) {
# Compute time maxage seconds from now
expires <- Sys.time() + as.numeric(maxage[2])
}
# Secure
secure <- grepl(";\\s+Secure(;|$)", cookieHeader, ignore.case = TRUE)
list(name = key,
value = val,
expires = expires,
path = path,
secure = secure)
}
# Appends a cookie header from the .cookieStore to the existing set of headers
# @param requestURL the parsed URL as returned from `parseHttpUrl`
# @param headers a named character vector containing the set of headers to be extended
appendCookieHeaders <- function(requestURL, headers) {
host <- getCookieHost(requestURL)
if (!exists(host, .cookieStore)) {
# Nothing to do
return(headers)
}
cookies <- get(host, envir = .cookieStore)
# If any cookies are expired, remove them from the cookie store
if (any(cookies$expires < as.integer(Sys.time()))) {
cookies <- cookies[cookies$expires >= as.integer(Sys.time()), ]
# Update the store, removing the expired cookies
assign(host, cookies, envir = .cookieStore)
}
if (nrow(cookies) == 0) {
# Short-circuit, return unmodified headers.
return(headers)
}
# Filter to only include cookies that match the path prefix
cookies <- cookies[substring(requestURL$path, 1, nchar(cookies$path)) == cookies$path, ]
# If insecure channel, filter out secure cookies
if (tolower(requestURL$protocol) != "https") {
cookies <- cookies[!cookies$secure, ]
}
# TODO: Technically per the RFC we're supposed to order these cookies by which
# paths most specifically match the request.
cookieHeader <- paste(
apply(cookies, 1, function(x) paste0(x["name"], "=", x["value"])),
collapse = "; "
)
c(headers, cookie = cookieHeader)
}
getCookieHost <- function(requestURL) {
host <- requestURL$host
port <- requestURL$port
if (!is.null(port) && nchar(port) > 0) {
port <- sub("^:", "", port)
# By my reading of the RFC, we technically only need to include the port #
# in the index if the host is an IP address. But here we're including the
# port number as a part of the host whether using a domain name or IP.
# Erring on the side of not sending the cookies to the wrong services
host <- paste(host, port, sep = ":")
}
host
}
showCookies <- function(urlstr) {
url <- parseHttpUrl(urlstr)
cat("Cookies:", "\n")
host <- getCookieHost(url)
if (exists(host, .cookieStore)) {
print(get(host, envir = .cookieStore))
} else {
print("None")
}
}
|