File: parse_all.R

package info (click to toggle)
r-cran-evaluate 1.0.5-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 444 kB
  • sloc: sh: 13; makefile: 2
file content (202 lines) | stat: -rw-r--r-- 6,010 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#' Parse, retaining comments
#'
#' Works very similarly to parse, but also keeps original formatting and
#' comments.
#'
#' @param x object to parse.  Can be a string, a file connection, or a function.
#'   If a connection, will be opened and closed only if it was closed initially.
#' @param filename string overriding the file name
#' @param allow_error whether to allow syntax errors in `x`
#' @return
#' A data frame two columns, `src` and `expr`, and one row for each complete
#' input in `x`. A complete input is R code that would trigger execution when
#' typed at the console. This might consist of multiple expressions separated
#' by `;` or one expression spread over multiple lines (like a function
#' definition).
#'
#' `src` is a character vector of source code. Each element represents a
#' complete input expression (which might span multiple line) and always has a
#' terminal `\n`.
#'
#' `expr` is a list-column of [expression]s. The expressions can be of any
#' length, depending on the structure of the complete input source:
#'
#' * If `src` consists of only only whitespace and/or comments, `expr` will
#'   be length 0.
#' * If `src` a single scalar (like `TRUE`, `1`, or `"x"`), name, or
#'   function call, `expr` will be length 1.
#' * If `src` contains multiple expressions separated by `;`, `expr` will
#'   have length two or more.
#'
#' The expressions have their srcrefs removed.
#'
#' If there are syntax errors in `x` and `allow_error = TRUE`, the data
#' frame will have an attribute `PARSE_ERROR` that stores the error object.
#' @export
#' @examples
#' # Each of these inputs are single line, but generate different numbers of
#' # expressions
#' source <- c(
#'   "# a comment",
#'   "x",
#'   "x;y",
#'   "x;y;z"
#' )
#' parsed <- parse_all(source)
#' lengths(parsed$expr)
#' str(parsed$expr)
#'
#' # Each of these inputs are a single expression, but span different numbers
#' # of lines
#' source <- c(
#'   "function() {}",
#'   "function() {",
#'   "  # Hello!",
#'   "}",
#'   "function() {",
#'   "  # Hello!",
#'   "  # Goodbye!",
#'   "}"
#' )
#' parsed <- parse_all(source)
#' lengths(parsed$expr)
#' parsed$src
parse_all <- function(x, filename = NULL, allow_error = FALSE) {
  UseMethod("parse_all")
}


#' @export
parse_all.character <- function(x, filename = NULL, allow_error = FALSE) {
  if (any(grepl("\n", x))) {
    # Ensure that empty lines are not dropped by strsplit()
    x[x == ""] <- "\n"
    # Standardise to a character vector with one line per element;
    # this is the input that parse() is documented to accept
    x <- unlist(strsplit(x, "\n"), recursive = FALSE, use.names = FALSE)
  }
  n <- length(x)

  filename <- filename %||% "<text>"

  src <- srcfilecopy(filename, x)
  if (allow_error) {
    exprs <- tryCatch(parse(text = x, srcfile = src), error = identity)
    if (inherits(exprs, "error")) {
      return(structure(
        data.frame(src = paste(x, collapse = "\n"), expr = empty_expr()),
        PARSE_ERROR = exprs
      ))
    }
  } else {
    exprs <- parse(text = x, srcfile = src)
  }

  srcref <- attr(exprs, "srcref", exact = TRUE)
  pos <- data.frame(
    start = vapply(srcref, `[[`, 7, FUN.VALUE = integer(1)),
    end = vapply(srcref, `[[`, 8, FUN.VALUE = integer(1))
  )
  pos$exprs <- exprs

  # parse() splits TLEs that use ; into multiple expressions so we
  # join together expressions that overlaps on the same line(s)
  line_group <- cumsum(is_new_line(pos$start, pos$end))
  tles <- lapply(split(pos, line_group), function(p) {
    n <- nrow(p)
    data.frame(
      src = paste(x[p$start[1]:p$end[n]], collapse = "\n"),
      expr = I(list(p$exprs)),
      line = p$start[1]
    )
  })
  tles <- do.call(rbind, tles)

  # parse() drops comments and whitespace so we add them back in
  gaps <- data.frame(start = c(1, pos$end + 1), end = c(pos$start - 1, n))
  gaps <- gaps[gaps$start <= gaps$end, , ]
  # some indexing magic in order to vectorise the extraction
  lengths <- gaps$end - gaps$start + 1
  lines <- sequence(lengths) + rep(gaps$start, lengths) - 1

  comments <- data.frame(
    src = x[lines],
    expr = empty_expr(length(lines)),
    line = lines
  )

  res <- rbind(tles, comments)
  res <- res[order(res$line), c("src", "expr")]

  # Restore newlines stripped while converting to vector of lines
  if (length(res$src)) {
    res$src <- paste0(res$src, "\n")
  } else {
    res$src <- character()
  }

  res$expr <- lapply(res$expr, removeSource)

  rownames(res) <- NULL
  res
}

#' @export
parse_all.connection <- function(x, filename = NULL, ...) {
  if (!isOpen(x, "r")) {
    open(x, "r")
    defer(close(x))
  }
  text <- readLines(x)
  filename <- filename %||% summary(x)$description

  parse_all(text, filename, ...)
}

#' @export
parse_all.function <- function(x, filename = NULL, ...) {
  filename <- filename %||% "<filename>"
  parse_all(find_function_body(x), filename = filename, ...)
}

# Calls are already parsed and always length one
#' @export
parse_all.call <- function(x, filename = NULL, ...) {
  parse_all(deparse(x), filename = filename, ...)
}

# Helpers ---------------------------------------------------------------------

empty_expr <- function(n = 1) {
  I(rep(list(expression()), n))
}

is_new_line <- function(start, end) {
  if (length(start) == 0) {
    logical()
  } else if (length(start) == 1) {
    TRUE
  } else {
    c(TRUE, start[-1] != end[-length(end)])
  }
}

find_function_body <- function(f) {
  if (is_call(body(f), "{")) {
    lines <- deparse(f, control = "useSource")
    expr <- parse(text = lines, keep.source = TRUE)

    data <- getParseData(expr)
    token_start <- which(data$token == "'{'")[[1]]
    token_end <- last(which(data$token == "'}'"))

    line_start <- data$line1[token_start] + 1
    line_end <- data$line2[token_end] - 1
    lines <- lines[seq2(line_start, line_end)]

    dedent <- min(data$col1[seq2(token_start + 1, token_end - 1)], 1e3)
    substr(lines, dedent, nchar(lines))
  } else {
    deparse(body(f))
  }
}