1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
|
#' Parse, retaining comments
#'
#' Works very similarly to parse, but also keeps original formatting and
#' comments.
#'
#' @param x object to parse. Can be a string, a file connection, or a function.
#' If a connection, will be opened and closed only if it was closed initially.
#' @param filename string overriding the file name
#' @param allow_error whether to allow syntax errors in `x`
#' @return
#' A data frame two columns, `src` and `expr`, and one row for each complete
#' input in `x`. A complete input is R code that would trigger execution when
#' typed at the console. This might consist of multiple expressions separated
#' by `;` or one expression spread over multiple lines (like a function
#' definition).
#'
#' `src` is a character vector of source code. Each element represents a
#' complete input expression (which might span multiple line) and always has a
#' terminal `\n`.
#'
#' `expr` is a list-column of [expression]s. The expressions can be of any
#' length, depending on the structure of the complete input source:
#'
#' * If `src` consists of only only whitespace and/or comments, `expr` will
#' be length 0.
#' * If `src` a single scalar (like `TRUE`, `1`, or `"x"`), name, or
#' function call, `expr` will be length 1.
#' * If `src` contains multiple expressions separated by `;`, `expr` will
#' have length two or more.
#'
#' The expressions have their srcrefs removed.
#'
#' If there are syntax errors in `x` and `allow_error = TRUE`, the data
#' frame will have an attribute `PARSE_ERROR` that stores the error object.
#' @export
#' @examples
#' # Each of these inputs are single line, but generate different numbers of
#' # expressions
#' source <- c(
#' "# a comment",
#' "x",
#' "x;y",
#' "x;y;z"
#' )
#' parsed <- parse_all(source)
#' lengths(parsed$expr)
#' str(parsed$expr)
#'
#' # Each of these inputs are a single expression, but span different numbers
#' # of lines
#' source <- c(
#' "function() {}",
#' "function() {",
#' " # Hello!",
#' "}",
#' "function() {",
#' " # Hello!",
#' " # Goodbye!",
#' "}"
#' )
#' parsed <- parse_all(source)
#' lengths(parsed$expr)
#' parsed$src
parse_all <- function(x, filename = NULL, allow_error = FALSE) {
UseMethod("parse_all")
}
#' @export
parse_all.character <- function(x, filename = NULL, allow_error = FALSE) {
if (any(grepl("\n", x))) {
# Ensure that empty lines are not dropped by strsplit()
x[x == ""] <- "\n"
# Standardise to a character vector with one line per element;
# this is the input that parse() is documented to accept
x <- unlist(strsplit(x, "\n"), recursive = FALSE, use.names = FALSE)
}
n <- length(x)
filename <- filename %||% "<text>"
src <- srcfilecopy(filename, x)
if (allow_error) {
exprs <- tryCatch(parse(text = x, srcfile = src), error = identity)
if (inherits(exprs, "error")) {
return(structure(
data.frame(src = paste(x, collapse = "\n"), expr = empty_expr()),
PARSE_ERROR = exprs
))
}
} else {
exprs <- parse(text = x, srcfile = src)
}
srcref <- attr(exprs, "srcref", exact = TRUE)
pos <- data.frame(
start = vapply(srcref, `[[`, 7, FUN.VALUE = integer(1)),
end = vapply(srcref, `[[`, 8, FUN.VALUE = integer(1))
)
pos$exprs <- exprs
# parse() splits TLEs that use ; into multiple expressions so we
# join together expressions that overlaps on the same line(s)
line_group <- cumsum(is_new_line(pos$start, pos$end))
tles <- lapply(split(pos, line_group), function(p) {
n <- nrow(p)
data.frame(
src = paste(x[p$start[1]:p$end[n]], collapse = "\n"),
expr = I(list(p$exprs)),
line = p$start[1]
)
})
tles <- do.call(rbind, tles)
# parse() drops comments and whitespace so we add them back in
gaps <- data.frame(start = c(1, pos$end + 1), end = c(pos$start - 1, n))
gaps <- gaps[gaps$start <= gaps$end, , ]
# some indexing magic in order to vectorise the extraction
lengths <- gaps$end - gaps$start + 1
lines <- sequence(lengths) + rep(gaps$start, lengths) - 1
comments <- data.frame(
src = x[lines],
expr = empty_expr(length(lines)),
line = lines
)
res <- rbind(tles, comments)
res <- res[order(res$line), c("src", "expr")]
# Restore newlines stripped while converting to vector of lines
if (length(res$src)) {
res$src <- paste0(res$src, "\n")
} else {
res$src <- character()
}
res$expr <- lapply(res$expr, removeSource)
rownames(res) <- NULL
res
}
#' @export
parse_all.connection <- function(x, filename = NULL, ...) {
if (!isOpen(x, "r")) {
open(x, "r")
defer(close(x))
}
text <- readLines(x)
filename <- filename %||% summary(x)$description
parse_all(text, filename, ...)
}
#' @export
parse_all.function <- function(x, filename = NULL, ...) {
filename <- filename %||% "<filename>"
parse_all(find_function_body(x), filename = filename, ...)
}
# Calls are already parsed and always length one
#' @export
parse_all.call <- function(x, filename = NULL, ...) {
parse_all(deparse(x), filename = filename, ...)
}
# Helpers ---------------------------------------------------------------------
empty_expr <- function(n = 1) {
I(rep(list(expression()), n))
}
is_new_line <- function(start, end) {
if (length(start) == 0) {
logical()
} else if (length(start) == 1) {
TRUE
} else {
c(TRUE, start[-1] != end[-length(end)])
}
}
find_function_body <- function(f) {
if (is_call(body(f), "{")) {
lines <- deparse(f, control = "useSource")
expr <- parse(text = lines, keep.source = TRUE)
data <- getParseData(expr)
token_start <- which(data$token == "'{'")[[1]]
token_end <- last(which(data$token == "'}'"))
line_start <- data$line1[token_start] + 1
line_end <- data$line2[token_end] - 1
lines <- lines[seq2(line_start, line_end)]
dedent <- min(data$col1[seq2(token_start + 1, token_end - 1)], 1e3)
substr(lines, dedent, nchar(lines))
} else {
deparse(body(f))
}
}
|