File: tidy.R

package info (click to toggle)
r-cran-formatr 1.14-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 376 kB
  • sloc: javascript: 19; sh: 10; makefile: 2
file content (315 lines) | stat: -rw-r--r-- 13,599 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
#' Reformat R code
#'
#' Read R code from a file or the clipboard and reformat it. This function is
#' based on \code{\link{parse}()} and \code{\link{deparse}()}, but it does
#' several other things, such as preserving blank lines and comments,
#' substituting the assignment operator \code{=} with \code{<-}, and
#' re-indenting code with a specified number of spaces.
#'
#' A value of the argument \code{width.cutoff} wrapped in \code{\link{I}()}
#' (e.g., \code{I(60)}) will be treated as the \emph{upper bound} of the line
#' width. The corresponding argument to \code{deparse()} is a lower bound, so
#' the function will perform a binary search for a width value that can make
#' \code{deparse()} return code with line width smaller than or equal to the
#' \code{width.cutoff} value. If the search fails, a warning will signal,
#' suppressible by global option \code{options(formatR.width.warning = FALSE)}.
#' @param source A character string: file path to the source code (defaults to
#'   the clipboard).
#' @param comment Whether to keep comments.
#' @param blank Whether to keep blank lines.
#' @param arrow Whether to substitute the assignment operator \code{=} with
#'   \code{<-}.
#' @param pipe Whether to substitute the \pkg{magrittr} pipe \code{\%>\%} with
#'   R's native pipe operator \code{|>}.
#' @param brace.newline Whether to put the left brace \code{\{} to a new line.
#' @param indent Number of spaces to indent the code.
#' @param wrap Whether to wrap comments to the linewidth determined by
#'   \code{width.cutoff} (roxygen comments will never be wrapped).
#' @param width.cutoff An integer in \code{[20, 500]}: if a line's character
#'   length is at or over this number, the function will try to break it into a
#'   new line. In other words, this is the \emph{lower bound} of the line width.
#'   See \sQuote{Details} if an upper bound is desired instead.
#' @param args.newline Whether to start the arguments of a function call on a
#'   new line instead of after the function name and \code{(} when the arguments
#'   cannot fit one line.
#' @param output Whether to output to the console or a file using
#'   \code{\link{cat}()}.
#' @param text An alternative way to specify the input: if \code{NULL}, the
#'   function will use the \code{source} argument; if a character vector
#'   containing the source code, the function will use this and ignore the
#'   \code{source} argument.
#' @param ... Other arguments passed to \code{\link{cat}()}, e.g. \code{file}
#'   (this can be useful for batch-processing R scripts, e.g.
#'   \code{tidy_source(source = 'input.R', file = 'output.R')}).
#' @return A list with components \item{text.tidy}{the reformatted code as a
#'   character vector} \item{text.mask}{the code containing comments, which are
#'   masked in assignments or with the weird operator}.
#' @note Be sure to read the reference to know other limitations.
#' @author Yihui Xie <\url{https://yihui.org}> with substantial contribution
#'   from Yixuan Qiu <\url{https://yixuan.blog}>
#' @seealso \code{\link{parse}()}, \code{\link{deparse}()}
#' @references \url{https://yihui.org/formatR/} (an introduction to this
#'   package, with examples and further notes)
#' @import stats utils
#' @export
#' @example inst/examples/tidy.source.R
tidy_source = function(
  source = 'clipboard', comment = getOption('formatR.comment', TRUE),
  blank = getOption('formatR.blank', TRUE),
  arrow = getOption('formatR.arrow', FALSE),
  pipe = getOption('formatR.pipe', FALSE),
  brace.newline = getOption('formatR.brace.newline', FALSE),
  indent = getOption('formatR.indent', 4),
  wrap = getOption('formatR.wrap', TRUE),
  width.cutoff = getOption('formatR.width', getOption('width')),
  args.newline = getOption('formatR.args.newline', FALSE),
  output = TRUE, text = NULL, ...
) {
  if (is.null(text)) {
    if (source == 'clipboard' && Sys.info()['sysname'] == 'Darwin') {
      source = pipe('pbpaste'); on.exit(close(source), add = TRUE)
      # use readChar() instead of readLines() in case users didn't copy the last
      # \n into clipboard, e.g., https://github.com/yihui/formatR/issues/54
      text = readChar(source, getOption('formatR.clipboard.size', 1e5))
      text = unlist(strsplit(text, '\n'))
    } else {
      text = readLines(source, warn = FALSE)
    }
  }
  enc = special_encoding(text)
  if (length(text) == 0L || all(grepl('^\\s*$', text))) {
    if (output) cat('\n', ...)
    return(list(text.tidy = text, text.mask = text))
  }
  if (blank) {
    one = one_string(text) # record how many line breaks before/after
    n1 = attr(regexpr('^\n*', one), 'match.length')
    n2 = attr(regexpr('\n*$', one), 'match.length')
  }
  on.exit(.env$line_break <- NULL, add = TRUE)
  if (width.cutoff > 500) width.cutoff[1] = 500
  if (width.cutoff < 20) width.cutoff[1] = 20
  # insert enough spaces into infix operators such as %>% so the lines can be
  # broken after the operators
  spaces = rep_chars(width.cutoff)
  text = mask_comments(text, comment, blank, wrap, arrow, pipe, args.newline, spaces)
  text.mask = tidy_block(
    text, width.cutoff, rep_chars(indent), brace.newline, wrap, args.newline, spaces
  )
  text.tidy = if (comment) unmask_source(text.mask) else text.mask
  # restore new lines in the beginning and end
  if (blank) text.tidy = c(rep('', n1), text.tidy, rep('', n2))
  if (output) cat(text.tidy, sep = '\n', ...)
  invisible(list(
    text.tidy = restore_encoding(text.tidy, enc),
    text.mask = restore_encoding(text.mask, enc)
  ))
}

# some tokens that should be rare to appear in code from real world, mainly to
# protect comments and blank lines
begin.comment = '.BeGiN_TiDy_IdEnTiFiEr_HaHaHa'
end.comment = '.HaHaHa_EnD_TiDy_IdEnTiFiEr'
pat.comment = sprintf('invisible\\("\\%s|\\%s"\\)', begin.comment, end.comment)
mat.comment = sprintf('invisible\\("\\%s([^"]*)\\%s"\\)', begin.comment, end.comment)
inline.comment = ' %\b%[ ]*"([ ]*#[^"]*)"'
blank.comment = sprintf('invisible("%s%s")', begin.comment, end.comment)
blank.comment2 = paste0('^\\s*', gsub('\\(', '\\\\(', blank.comment), '\\s*$')

# first, perform a (semi-)binary search to find the greatest cutoff width such
# that the width of the longest line <= `width`; if the search fails, use
# brute-force to try all possible widths
deparse2 = function(
  expr, width, spaces = '', indent = '    ',
  warn = getOption('formatR.width.warning', TRUE)
) {
  wmin = 20  # if deparse() can't manage it with width.cutoff <= 20, issue a warning
  wmax = min(500, width + 10)  # +10 because a larger width may result in smaller actual width

  r = seq(wmin, wmax)
  k = setNames(rep(NA, length(r)), as.character(r))  # results of width checks
  d = p = list()  # deparsed results and lines exceeding desired width

  # pattern for pipe operators like %>%
  pat.infix = paste0('(%)(', infix_ops, ') {', width, '}(%)$')
  check_width = function(w) {
    i = as.character(w)
    if (!is.na(x <- k[i])) return(x)
    x = deparse(expr, w)
    x = trimws(x, 'right')
    d[[i]] <<- x
    x2 = grep(pat.comment, x, invert = TRUE, value = TRUE)  # don't check comments
    x2 = gsub(pat.infix, '\\1\\2\\3', x2)  # remove extra spaces in %>% operators
    x2 = restore_infix(x2)
    x2 = reindent_lines(x2, indent)
    x2 = restore_arg_breaks(x2, width, spaces, indent, split = TRUE)
    p[[i]] <<- x2[exceed_width(x2, width)]
    k[i] <<- length(p[[i]]) == 0
  }

  # if the desired width happens to just work, return the result
  if (check_width(w <- width)) return(d[[as.character(w)]])

  repeat {
    if (!any(is.na(k))) break  # has tried all possibilities
    if (wmin >= wmax) break
    w = ceiling((wmin + wmax)/2)
    if (check_width(w)) wmin = w else wmax = wmax - 2
  }

  # try all the rest of widths if no suitable width has been found
  if (!any(k, na.rm = TRUE)) for (i in r[is.na(k)]) check_width(i)
  r = r[which(k)]
  if ((n <- length(r)) > 0) return(d[[as.character(r[n])]])

  i = as.character(width)
  if (warn) warning(
    'Unable to find a suitable cut-off to make the line widths smaller than ',
    width, ' for the line(s) of code:\n', one_string('  ', p[[i]]),
    call. = FALSE
  )
  d[[i]]
}

# wrapper around parse() and deparse()
tidy_block = function(
  text, width = getOption('width'), indent = '    ',
  brace.newline = FALSE, wrap = TRUE, args.newline = FALSE, spaces = rep_chars(width)
) {
  exprs = parse_source(text)
  if (length(exprs) == 0) return(character(0))
  deparse = if (inherits(width, 'AsIs')) {
    function(x, width) deparse2(x, width, spaces, indent)
  } else base::deparse
  unlist(lapply(as.list(exprs), function(e) {
    x = deparse(e, width)
    x = trimws(x, 'right')
    x = reindent_lines(x, indent)
    # remove white spaces on blank lines
    x = gsub(blank.comment2, '', x)
    x = reflow_comments(x, width, wrap)
    if (brace.newline) x = move_leftbrace(x)
    x = restore_infix(x)
    x = one_string(x)
    # restore anonymous functions
    if (!brace.newline)
      x = gsub('( %\\\\\b%)\\s+(\\{)', '\\1 \\2', x)  # remove possible \n before {
    x = gsub('`\\\\\\\\`(\\(.*?\\)) %\\\\\b%', '\\\\\\1', x)
    if (args.newline) x = restore_arg_breaks(x, width, spaces, indent)
    x
  }))
}

# Restore the real source code from the masked text
unmask_source = function(x) {
  if (length(x) == 0) return(x)
  m = .env$line_break
  if (!is.null(m)) x = gsub(m, '\n', x)
  # if the comments were separated into the next line, then remove '\n' after
  # the identifier first to move the comments back to the same line
  x = gsub('(%\b%)[ ]*\n', '\\1', x)
  # move 'else ...' back to the last line
  x = gsub('\n\\s*else(\\s+|$)', ' else\\1', x)
  if (any(grepl('\\\\\\\\', x)) || any(grepl(inline.comment, x))) {
    m = gregexpr(inline.comment, x)
    regmatches(x, m) = lapply(regmatches(x, m), restore_bs)
  }
  # inline comments should be terminated by $ or \n
  x = gsub(paste(inline.comment, '(\n|$)', sep = ''), '  \\1\\2', x)
  # the rest of inline comments should be appended by \n
  gsub(inline.comment, '  \\1\n', x)
}


#' Format all R scripts under a directory, or specified R scripts
#'
#' Look for all R scripts under a directory (using the pattern
#' \code{"[.][RrSsQq]$"}), then tidy them with \code{\link{tidy_source}()}. If
#' successful, the original scripts will be overwritten with reformatted ones.
#' Please back up the original directory first if you do not fully understand
#' the tricks used by \code{\link{tidy_source}()}. \code{tidy_file()} formats
#' scripts specified by file names.
#' @param path The path to a directory containning R scripts.
#' @param recursive Whether to recursively look for R scripts under \code{path}.
#' @param ... Other arguments to be passed to \code{\link{tidy_source}()}.
#' @param file A vector of filenames.
#' @return Invisible \code{NULL}.
#' @author Yihui Xie (\code{tidy_dir}) and Ed Lee (\code{tidy_file})
#' @seealso \code{\link{tidy_source}()}
#' @export
#' @examples
#' library(formatR)
#'
#' path = tempdir()
#' file.copy(system.file('demo', package = 'base'), path, recursive=TRUE)
#' tidy_dir(path, recursive=TRUE)
tidy_dir = function(path = '.', recursive = FALSE, ...) {
  tidy_file(list.files(
    path, pattern = '[.][RrSsQq]$', full.names = TRUE, recursive = recursive
  ), ...)
}

#' @export
#' @rdname tidy_dir
tidy_file = function(file, ...) {
  for (f in file) {
    message("tidying ", f)
    try(tidy_source(f, file = f, ...))
  }
}

#' Reformat R code in RStudio IDE
#'
#' If any R code is selected in the RStudio source editor, this function
#' reformats the selected code; otherwise it reformats the current open file (if
#' it is unsaved, it will be automatically saved).
#' @param ... Arguments to be passed to \code{\link{tidy_source}()}, among which
#'   the \code{indent} argument will respect the value you set for the number of
#'   spaces for indentation in RStudio.
#' @note If the output is not what you want, you can undo the change in the
#'   editor (Ctrl + Z or Command + Z).
#' @export
#' @examplesIf interactive()
#' formatR::tidy_rstudio()
#' formatR::tidy_rstudio(args.newline = TRUE)
tidy_rstudio = function(...) {
  ctx = rstudio_context()
  if (is.null(getOption('formatR.indent'))) {
    opts = options(formatR.indent = rstudioapi::readRStudioPreference('num_spaces_for_tab', 4))
    on.exit(options(opts), add = TRUE)
  }
  if (length(ctx$selection) == 1 && !identical(txt <- ctx$selection[[1]]$text, '')) {
    res = tidy_source(text = txt, output = FALSE, ...)$text.tidy
    rstudioapi::modifyRange(ctx$selection[[1]]$range, one_string(res), ctx$id)
  } else {
    rstudioapi::documentSave(ctx$id)
    res = tidy_source(ctx$path, output = FALSE, ...)$text.tidy
    writeLines(enc2utf8(res), ctx$path, useBytes = TRUE)
  }
}

rstudio_context = function() {
  ctx = rstudioapi::getSourceEditorContext()
  if (is.null(ctx)) stop('There is no open document in the RStudio source editor.')
  ctx
}

#' Substitute the \pkg{magrittr} pipe with R's native pipe operator
#'
#' Parse the R code in the RStudio editor, identify \code{\%>\%}, and substitute
#' with \code{|>}.
#' @note Currently this function only works inside the RStudio IDE, and may be
#'   extended in future to deal with arbitrary R code elsewhere.
#' @export
#' @examplesIf interactive()
#' formatR::tidy_pipe()
tidy_pipe = function() {
  ctx = rstudio_context()
  d = parse_data(ctx$contents)
  i = d$token == 'SPECIAL' & d$text == '%>%'
  if (!any(i)) return(invisible())
  d = d[i, c('line1', 'col1', 'line2', 'col2')]
  d[, 4] = d[, 4] + 1
  r = unname(as.list(as.data.frame(t(d))))
  rstudioapi::modifyRange(r, '|>', ctx$id)
}