1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
|
#' Interpolate Values
#'
#' @description This is the core function within the package for areal weighted
#' interpolation. It validates both data sources before interpolating one or more
#' listed values from the source data into the target data.
#'
#' @usage aw_interpolate(.data, tid, source, sid, weight = "sum", output = "sf", extensive,
#' intensive)
#'
#' @details Areal weighted interpolation can be used for generating demographic
#' estimates for overlapping but incongruent polygon features. It assumes that
#' individual members of a population are evenly dispersed within the source features
#' (an assumption not likely to hold in the real world). It also functions best
#' when data are in a projected coordinate system, like the UTM coordinate system.
#'
#' @param .data A \code{sf} object that data should be interpolated to (this is referred
#' to as the \code{target} elsewhere in the package).
#' @param tid A unique identification number within \code{target}
#' @param source A \code{sf} object with data to be interpolated
#' @param sid A unique identification number within \code{source}
#' @param weight For \code{"extensive"} interpolations, should be either \code{"total"} or
#' \code{"sum"}. For \code{"intensive"} interpolations, should be \code{"sum"}. For mixed
#' interpolations, this will only impact the calculation of the extensive variables.
#' @param output One of either \code{"sf"} or \code{"tibble"}
#' @param extensive A vector of quoted variable names to be treated as spatially extensive
#' (e.g. population counts); optional if \code{intensive} is specified
#' @param intensive A vector of quoted variable names to be treated as spatially intensive
#' (e.g. population density); optional if \code{extensive} is specified
#'
#' @return A \code{sf} object or a \code{tibble} with the value or values interpolated into
#' the \code{target} data.
#'
#' @seealso \link{c}
#'
#' @examples
#' aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID, weight = "sum",
#' output = "sf", extensive = "TOTAL_E")
#'
#' aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID, weight = "sum",
#' output = "tibble", intensive = "ASTHMA")
#'
#' @importFrom dplyr as_tibble
#' @importFrom dplyr left_join
#' @importFrom dplyr rename
#' @importFrom dplyr select
#' @importFrom glue glue
#' @importFrom rlang enquo
#' @importFrom rlang quo
#' @importFrom rlang quo_name
#' @importFrom rlang sym
#' @importFrom sf st_geometry
#'
#' @export
aw_interpolate <- function(.data, tid, source, sid, weight = "sum", output = "sf", extensive, intensive){
# save parameters to list
paramList <- as.list(match.call())
# check for missing parameters
if (missing(.data)) {
stop("A sf object containing target data must be specified for the '.data' argument.")
}
if (missing(tid)) {
stop("A variable name must be specified for the 'tid' argument.")
}
if (missing(source)) {
stop("A sf object must be specified for the 'source' argument.")
}
if (missing(sid)) {
stop("A variable name must be specified for the 'sid' argument.")
}
# determine extensive and intensive
if (missing(extensive) & missing(intensive)){
stop("Either 'extensive' or 'intenstive' must be specified with an accompanying list of variables to interpolate.")
}
if (missing(intensive) & !missing(extensive)){
type <- "extensive"
} else if (!missing(intensive) & missing(extensive)){
type <- "intensive"
} else if (!missing(intensive) & !missing(extensive)){
type <- "mixed"
}
# check for misspecified parameters
if (weight %in% c("sum", "total") == FALSE){
stop(glue::glue("The given weight type '{var}' is not valid. 'weight' must be either 'sum' or 'total'.",
var = weight))
}
if (type == "intensive" & weight == "total"){
stop("Spatially intensive interpolations should be caclulated using 'sum' for 'weight'.")
}
if (output %in% c("sf", "tibble") == FALSE){
stop(glue::glue("The given output type '{var}' is not valid. 'output' must be either 'sf' or 'tibble'.",
var = output))
}
# nse
if (!is.character(paramList$sid)) {
sidQ <- rlang::enquo(sid)
} else if (is.character(paramList$sid)) {
sidQ <- rlang::quo(!! rlang::sym(sid))
}
sidQN <- rlang::quo_name(rlang::enquo(sid))
if (!is.character(paramList$tid)) {
tidQ <- rlang::enquo(tid)
} else if (is.character(paramList$tid)) {
tidQ <- rlang::quo(!! rlang::sym(tid))
}
tidQN <- rlang::quo_name(rlang::enquo(tid))
# check variables
if(!!sidQN %in% colnames(source) == FALSE) {
stop(glue::glue("Variable '{var}', given for the source ID ('sid'), cannot be found in the given source object.",
var = sidQN))
}
if(!!tidQN %in% colnames(.data) == FALSE) {
stop(glue::glue("Variable '{var}', given for the target ID ('tid'), cannot be found in the given target object.",
var = tidQN))
}
# check for matching tid and sid variable names
if (tidQN == sidQN){
# store conflict indicator
nameConflict <- TRUE
# store original tid name for later
tidOrig <- tidQN
# rename tid to ...tid
.data <- dplyr::rename(.data, ...tid = !!tidQN)
tidQN <- "...tid"
tidQ <- rlang::quo(!! rlang::sym(tidQN))
} else {
# store conflict indicator
nameConflict <- FALSE
}
# create variable lists
if (type == "extensive"){
vars <- extensive
} else if (type == "intensive"){
vars <- intensive
} else if (type == "mixed"){
vars <- c(extensive, intensive)
}
# validate source and target data
if (ar_validate(source = source, target = .data, varList = vars, method = "aw") == FALSE){
stop("Data validation failed. Use ar_validate with verbose = TRUE to identify concerns.")
}
# call aw_interpolater
if ((type == "extensive" | type == "intensive") & length(vars) == 1) {
# nse
valueQ <- rlang::quo(!! rlang::sym(vars))
# interpolate
data <- aw_interpolate_single(source = source, sid = !!sidQ, value = !!valueQ, target = .data,
tid = !!tidQ, type = type, weight = weight)
} else if ((type == "extensive" | type == "intensive") & length(vars) > 1) {
# interpolate
data <- aw_interpolate_multiple(source = source, sid = !!sidQ, values = vars, target = .data,
tid = !!tidQ, type = type, weight = weight)
} else if (type == "mixed"){
# conduct spatially extensive interpolations
if (length(extensive) == 1){
# nse
valueQ <- rlang::quo(!! rlang::sym(extensive))
# interpolate
exresults <- aw_interpolate_single(source = source, sid = !!sidQ, value = !!valueQ, target = .data,
tid = !!tidQ, type = "extensive", weight = weight)
} else if (length(extensive) > 1){
# interpolate
exresults <- aw_interpolate_multiple(source = source, sid = !!sidQ, values = extensive, target = .data,
tid = !!tidQ, type = "extensive", weight = weight)
}
# conduct spatially intensive interpolations
if (length(intensive) == 1){
# nse
valueQ <- rlang::quo(!! rlang::sym(intensive))
# interpolate
inresults <- aw_interpolate_single(source = source, sid = !!sidQ, value = !!valueQ, target = .data,
tid = !!tidQ, type = "intensive", weight = "sum")
} else if (length(intensive) > 1){
# interpolate
inresults <- aw_interpolate_multiple(source = source, sid = !!sidQ, values = intensive, target = .data,
tid = !!tidQ, type = "intensive", weight = "sum")
}
# combine spatially extensive and intensive data
data <- dplyr::left_join(exresults, inresults, by = tidQN)
}
# structure output
if (output == "sf"){
# left join with target data
out <- dplyr::left_join(.data, data, by = tidQN)
} else if (output == "tibble"){
# left join with target data
data <- dplyr::left_join(.data, data, by = tidQN)
# remove geometry
sf::st_geometry(data) <- NULL
# convert to tibble
out <- dplyr::as_tibble(data)
}
# rename tid
if (nameConflict == TRUE){
out <- dplyr::rename(out, !!tidOrig := !!tidQN)
}
# return output
return(out)
}
# Intermediate Function - Single Value
#
# @description Intermediate function called when there is only one variable to be interpolated.
# This is used to simplify the code for \code{aw_interpolate}.
#
# @param source A \code{sf} object with data to be interpolated
# @param sid A unique identification number within \code{source}
# @param value A column within \code{source} to be interpolated
# @param target A \code{sf} object that data should be interpolated to
# @param tid A unique identification number within \code{target}
# @param type One of either \code{"extensive"} (if the data are spatially extensive e.g.
# population counts), \code{"intensive"} (if the data are spatially intensive e.g.
# population density), or \code{"mixed"} (if the data include both extensive and
# intensive values). If \code{"extensive"}, the sum is returned for the interpolated
# value. If \code{"intensive"}, the mean is returned for the interpolated value.
# If \code{"mixed"}, vectors named \code{"extensive"} and \code{"intensive"} containing
# the relevant variable names should be specified in the dots.
# @param weight For \code{"extensive"} interpolations; should be either \code{"total"} or
# \code{"sum"}.
#
# @return A tibble with interpolated data, ready for final merge with \code{target}.
#
aw_interpolate_single <- function(source, sid, value, target, tid, type, weight){
# save parameters to list
paramList <- as.list(match.call())
# nse
sidQ <- rlang::enquo(sid)
sidQN <- rlang::quo_name(rlang::enquo(sidQ))
valueQ <- rlang::enquo(value)
valueQN <- rlang::quo_name(rlang::enquo(value))
tidQ <- rlang::enquo(tid)
tidQN <- rlang::quo_name(rlang::enquo(tidQ))
# strip source and target dataframes
sourceS <- aw_strip_df(source, id = sidQN, value = valueQN)
targetS <- aw_strip_df(target, id = tidQN)
# interpolate
out <- aw_interpolater(source = sourceS, sid = !!sidQ, value = !!valueQ, target = targetS,
tid = !!tidQ, type = type, weight = weight)
# return output
return(out)
}
# Intermediate Function - Multiple Values (iteration)
#
# @description Intermediate function called when are more than one variables to be interpolated.
# This is used to simplify the code for \code{aw_interpolate}.
#
# @param source A \code{sf} object with data to be interpolated
# @param sid A unique identification number within \code{source}
# @param values A vector of columns within \code{source} to be interpolated
# @param target A \code{sf} object that data should be interpolated to
# @param tid A unique identification number within \code{target}
# @param type One of either \code{"extensive"} (if the data are spatially extensive e.g.
# population counts), \code{"intensive"} (if the data are spatially intensive e.g.
# population density), or \code{"mixed"} (if the data include both extensive and
# intensive values). If \code{"extensive"}, the sum is returned for the interpolated
# value. If \code{"intensive"}, the mean is returned for the interpolated value.
# If \code{"mixed"}, vectors named \code{"extensive"} and \code{"intensive"} containing
# the relevant variable names should be specified in the dots.
# @param weight For \code{"extensive"} interpolations; should be either \code{"total"} or
# \code{"sum"}.
#
# @importFrom dplyr %>%
# @importFrom dplyr bind_cols
# @importFrom dplyr one_of
# @importFrom dplyr select
# @importFrom purrr imap
# @importFrom purrr map
# @importFrom purrr reduce
# @importFrom rlang enquo
# @importFrom rlang quo_name
#
# @return A tibble with interpolated data, ready for final merge with \code{target}.
#
aw_interpolate_multiple <- function(source, sid, values, target, tid, type, weight){
# save parameters to list
paramList <- as.list(match.call())
# nse
sidQ <- rlang::enquo(sid)
sidQN <- rlang::quo_name(rlang::enquo(sidQ))
tidQ <- rlang::enquo(tid)
tidQN <- rlang::quo_name(rlang::enquo(tidQ))
# create column list
colNames <- c(tidQN, values)
# strip target dataframe
targetS <- aw_strip_df(target, id = tidQN)
# create list of sf objects
values %>%
split(values) %>%
purrr::map(~ aw_strip_df(source, id = !!sidQ, value = .x)) %>%
purrr::imap(~ aw_interpolater(source = .x, sid = !!sidQ, value = (!! rlang::quo(!! rlang::sym(.y))),
target = targetS, tid = !!tidQ, type = type, weight = weight,
multiple = TRUE)) %>%
purrr::reduce(.f = dplyr::bind_cols) -> out
# remove geometry
sf::st_geometry(targetS) <- NULL
# combine
out <- dplyr::bind_cols(targetS, out)
# return output
return(out)
}
# Strip dataframe of all non-essential variables
#
# @description \code{aw_strip_df} is called by \code{aw_interpolate}. It
# strips \code{sf} objects of nonessential variables but keeps
# variables listed in parameters.
#
# @param .data A \code{sf} object
# @param id A given source id field
# @param value Optional; the variable that estimations will be based on
#
# @return A \code{sf} object with only the \code{id} and, if provided, the
# \code{value} column as well.
#
aw_strip_df <- function(.data, id, value){
# save parameters to list
paramList <- as.list(match.call())
# nse
idQ <- rlang::enquo(id)
# strip variables
if (missing(value)){
# strip all but id
out <- dplyr::select(.data, !!idQ)
} else {
# additional nse for value
valsQ <- rlang::enquo(value)
# strip all but id and value
out <- dplyr::select(.data, !!idQ, !!valsQ)
}
# return output
return(out)
}
# Carry Out Interpolation
#
# @description \code{aw_interpolater} performs pipeline of interpolation specific
# calculations with \code{aw_intersect}, \code{aw_total}, \code{aw_weight},
# \code{aw_calculate}, and \code{aw_aggregate}. The interpolated total is then
# verified against the total calculated from the source data using \code{aw_verify}.
#
# @param source A \code{sf} object with data to be interpolated
# @param sid A unique identification number within \code{source}
# @param value A column within \code{source} to be interpolated
# @param target A \code{sf} object that data should be interpolated to
# @param tid A unique identification number within \code{target}
# @param type One of either \code{"extensive"} (if the data are spatially extensive e.g.
# population counts), \code{"intensive"} (if the data are spatially intensive e.g.
# population density), or \code{"mixed"} (if the data include both extensive and
# intensive values). If \code{"extensive"}, the sum is returned for the interpolated
# value. If \code{"intensive"}, the mean is returned for the interpolated value.
# If \code{"mixed"}, vectors named \code{"extensive"} and \code{"intensive"} containing
# the relevant variable names should be specified in the dots.
# @param weight For \code{"extensive"} interpolations; should be either \code{"total"} or
# \code{"sum"}.
#
# @return A \code{sf} object or tibble with \code{value} interpolated into
# the \code{target} data.
#
aw_interpolater <- function(source, sid, value, target, tid, type, weight, multiple = FALSE) {
# save parameters to list
paramList <- as.list(match.call())
# nse
sidQ <- rlang::enquo(sid)
valueQ <- rlang::enquo(value)
valueQN <- rlang::quo_name(rlang::enquo(value))
tidQ <- rlang::enquo(tid)
# intersect data
target %>%
aw_intersect(source = source, areaVar = "...area") -> intersected
# calculate total value for areal weight
if (type == "extensive"){
intersected %>%
aw_total(source = source, id = !!sidQ, areaVar = "...area", totalVar = "...totalArea",
type = "extensive", weight = weight) -> totaled
} else if (type == "intensive"){
intersected %>%
aw_total(source = source, id = !!tidQ, areaVar = "...area", totalVar = "...totalArea",
weight = weight, type = "intensive") -> totaled
}
# caclulate areal weight and estimated value; aggregate
totaled %>%
aw_weight(areaVar = "...area", totalVar = "...totalArea", areaWeight = "...areaWeight") %>%
aw_calculate(value = !!valueQ, areaWeight = "...areaWeight") %>%
aw_aggregate(target = target, tid = !!tidQ, interVar = !!valueQ) -> out
# remove sf from output
sf::st_geometry(out) <- NULL
# fix multiples
if (multiple == TRUE){
out <- dplyr::select(out, !!valueQ)
}
# return target output
return(out)
}
|