1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
|
# Copyright (C) 2021 Brodie Gaslam
#
# This file is part of "diffobj - Diffs for R Objects"
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
#' Diffs for R Objects
#'
#' Generate a colorized diff of two R objects for an intuitive visualization of
#' their differences. See `vignette(package="diffobj", "diffobj")` for details.
#'
#' @import crayon
#' @import methods
#' @importFrom utils capture.output file_test packageVersion read.csv
#' @importFrom stats ave frequency is.ts setNames
#' @importFrom grDevices rgb
#' @name diffobj-package
#' @docType package
NULL
# Because all these functions are so similar, we have constructed them with a
# function factory. This allows us to easily maintain consistent formals during
# initial development process when they have not been set in stone yet.
make_diff_fun <- function(capt_fun) {
# nocov start
function(
target, current,
mode=gdo("mode"),
context=gdo("context"),
format=gdo("format"),
brightness=gdo("brightness"),
color.mode=gdo("color.mode"),
word.diff=gdo("word.diff"),
pager=gdo("pager"),
guides=gdo("guides"),
trim=gdo("trim"),
rds=gdo("rds"),
unwrap.atomic=gdo("unwrap.atomic"),
max.diffs=gdo("max.diffs"),
disp.width=gdo("disp.width"),
ignore.white.space=gdo("ignore.white.space"),
convert.hz.white.space=gdo("convert.hz.white.space"),
tab.stops=gdo("tab.stops"),
line.limit=gdo("line.limit"),
hunk.limit=gdo("hunk.limit"),
align=gdo("align"),
style=gdo("style"),
palette.of.styles=gdo("palette"),
frame=par_frame(),
interactive=gdo("interactive"),
term.colors=gdo("term.colors"),
tar.banner=NULL,
cur.banner=NULL,
strip.sgr=gdo("strip.sgr"),
sgr.supported=gdo("sgr.supported"),
extra=list()
) {
# nocov end
frame # force frame so that `par_frame` called in this context
call.dat <- extract_call(sys.calls(), frame)
target # force target/current so if one missing we get an error here
current # and not later
# Check args and evaluate all the auto-selection arguments
etc.proc <- check_args(
call=call.dat$call, tar.exp=call.dat$tar, cur.exp=call.dat$cur,
mode=mode, context=context, line.limit=line.limit, format=format,
brightness=brightness, color.mode=color.mode, pager=pager,
ignore.white.space=ignore.white.space, max.diffs=max.diffs,
align=align, disp.width=disp.width,
hunk.limit=hunk.limit, convert.hz.white.space=convert.hz.white.space,
tab.stops=tab.stops, style=style, palette.of.styles=palette.of.styles,
frame=frame, tar.banner=tar.banner, cur.banner=cur.banner, guides=guides,
rds=rds, trim=trim, word.diff=word.diff, unwrap.atomic=unwrap.atomic,
extra=extra, interactive=interactive, term.colors=term.colors,
strip.sgr=strip.sgr, sgr.supported=sgr.supported,
call.match=match.call()
)
# If in rds mode, try to see if either target or current reference an RDS
if(rds) {
target <- get_rds(target)
current <- get_rds(current)
}
# Force crayon to whatever ansi status we chose; note we must do this after
# touching vars in case someone passes `options(crayon.enabled=...)` as one
# of the arguments
# old.crayon.opt <- options(
# crayon.enabled=
# is(etc.proc@style, "StyleAnsi") ||
# (!is(etc.proc@style, "StyleHtml") && etc.proc@sgr.supported)
# )
# on.exit(options(old.crayon.opt), add=TRUE)
err <- make_err_fun(sys.call())
# Compute gutter values so that we know correct widths to use for capture,
# etc. If not a base text type style, assume gutter and column padding are
# zero even though that may not always be correct
etc.proc@gutter <- gutter_dat(etc.proc)
col.pad.width <-
nchar2(etc.proc@style@text@pad.col, sgr.supported=etc.proc@sgr.supported)
gutt.width <- etc.proc@gutter@width
half.width <- as.integer((etc.proc@disp.width - col.pad.width) / 2)
etc.proc@line.width <-
max(etc.proc@disp.width, .min.width + gutt.width)
etc.proc@text.width <- etc.proc@line.width - gutt.width
etc.proc@line.width.half <- max(half.width, .min.width + gutt.width)
etc.proc@text.width.half <- etc.proc@line.width.half - gutt.width
# If in side by side mode already then we know we want half-width, and if
# width is less than 80 we know we want unitfied
if(etc.proc@mode == "auto" && etc.proc@disp.width < 80L)
etc.proc@mode <- "unified"
if(etc.proc@mode == "sidebyside") etc.proc <- sideBySide(etc.proc)
# Capture and diff
diff <- capt_fun(target, current, etc=etc.proc, err=err, extra)
diff
}
}
#' Diff \code{print}ed Objects
#'
#' Runs the diff between the \code{print} or \code{show} output produced by
#' \code{target} and \code{current}. Given the extensive parameter list, this
#' documentation page is intended as a reference for all the \code{diff*}
#' methods. For a high level introduction see \code{vignette("diffobj")}.
#'
#' Almost all aspects of how the diffs are computed and displayed are
#' controllable through the \code{diff*} methods parameters. This results in a
#' lengthy parameter list, but in practice you should rarely need to adjust
#' anything past the \code{color.mode} parameter. Default values are specified
#' as options so that users may configure diffs in a persistent manner.
#' \code{\link{gdo}} is a shorthand function to access \code{diffobj} options.
#'
#' Parameter order after \code{color.mode} is not guaranteed. Future versions
#' of \code{diffobj} may add parameters and re-order existing parameters past
#' \code{color.mode}.
#'
#' This and other \code{diff*} functions are S4 generics that dispatch on the
#' \code{target} and \code{current} parameters. Methods with signature
#' \code{c("ANY", "ANY")} are defined and act as the default methods. You can
#' use this to set up methods to pre-process or set specific parameters for
#' selected classes that can then \code{callNextMethod} for the actual diff.
#' Note that while the generics include \code{...} as an argument, none of the
#' methods do.
#'
#' Strings are re-encoded to UTF-8 with \code{\link{enc2utf8}} prior to
#' comparison to avoid encoding-only differences.
#'
#' The text representation of `target` and `current` should each have no more
#' than ~INT_MAX/4 lines.
#'
#' @section Matrices and Data Frames:
#'
#' While \code{diffPrint} attempts to handle the default R behavior that wraps
#' wide tables, the results are often sub-optimal. A better approach is to set
#' the \code{disp.width} parameter to a large enough value such that wrapping is
#' not necessary, and a browser-based \code{pager}. In the future we will add
#' the capability to specify different capture widths and wrap widths so that
#' this is an option for terminal output (see
#' \href{https://github.com/brodieG/diffobj/issues/109}{issue 109}).
#'
#' One thing to keep in mind is that \code{diffPrint} is not designed to work
#' with very large data frames.
#'
#' @export
#' @seealso \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{diffDeparse}} to compare deparsed objects, \code{\link{ses}}
#' for a minimal and fast diff @param target the reference object
#' @param target the reference object
#' @param current the object being compared to \code{target}
#' @param mode character(1L), one of:
#' \itemize{
#' \item \dQuote{unified}: diff mode used by \code{git diff}
#' \item \dQuote{sidebyside}: line up the differences side by side
#' \item \dQuote{context}: show the target and current hunks in their
#' entirety; this mode takes up a lot of screen space but makes it easier
#' to see what the objects actually look like
#' \item \dQuote{auto}: default mode; pick one of the above, will favor
#' \dQuote{sidebyside} unless \code{getOption("width")} is less than 80,
#' or in \code{diffPrint} and objects are dimensioned and do not fit side
#' by side, or in \code{diffChr}, \code{diffDeparse}, \code{diffFile} and
#' output does not fit in side by side without wrapping
#' }
#' @param context integer(1L) how many lines of context are shown on either side
#' of differences (defaults to 2). Set to \code{-1L} to allow as many as
#' there are. Set to \dQuote{auto} to display as many as 10 lines or as few
#' as 1 depending on whether total screen lines fit within the number of lines
#' specified in \code{line.limit}. Alternatively pass the return value of
#' \code{\link{auto_context}} to fine tune the parameters of the auto context
#' calculation.
#' @param format character(1L), controls the diff output format, one of:
#' \itemize{
#' \item \dQuote{auto}: to select output format based on terminal
#' capabilities; will attempt to use one of the ANSI formats if they
#' appear to be supported, and if not or if you are in the Rstudio console
#' it will attempt to use HTML and browser output if in interactive mode.
#' \item \dQuote{raw}: plain text
#' \item \dQuote{ansi8}: color and format diffs using basic ANSI escape
#' sequences
#' \item \dQuote{ansi256}: like \dQuote{ansi8}, except using the full range
#' of ANSI formatting options
#' \item \dQuote{html}: color and format using HTML markup; the resulting
#' string is processed with \code{\link{enc2utf8}} when output as a full
#' web page (see docs for \code{html.output} under \code{\link{Style}}).
#' }
#' Defaults to \dQuote{auto}. See \code{palette.of.styles} for details
#' on customization, \code{\link{style}} for full control of output format.
#' See `pager` parameter for more discussion of Rstudio behavior.
#' @param brightness character, one of \dQuote{light}, \dQuote{dark},
#' \dQuote{neutral}, useful for adjusting color scheme to light or dark
#' terminals. \dQuote{neutral} by default. See \code{\link{PaletteOfStyles}}
#' for details and limitations. Advanced: you may specify brightness as a
#' function of \code{format}. For example, if you typically wish to use a
#' \dQuote{dark} color scheme, except for when in \dQuote{html} format when
#' you prefer the \dQuote{light} scheme, you may use
#' \code{c("dark", html="light")} as the value for this parameter. This is
#' particularly useful if \code{format} is set to \dQuote{auto} or if you
#' want to specify a default value for this parameter via options. Any names
#' you use should correspond to a \code{format}. You must have one unnamed
#' value which will be used as the default for all \code{format}s that are
#' not explicitly specified.
#' @param color.mode character, one of \dQuote{rgb} or \dQuote{yb}.
#' Defaults to \dQuote{yb}. \dQuote{yb} stands for \dQuote{Yellow-Blue} for
#' color schemes that rely primarily on those colors to style diffs.
#' Those colors can be easily distinguished by individuals with
#' limited red-green color sensitivity. See \code{\link{PaletteOfStyles}} for
#' details and limitations. Also offers the same advanced usage as the
#' \code{brightness} parameter.
#' @param word.diff TRUE (default) or FALSE, whether to run a secondary word
#' diff on the in-hunk differences. For atomic vectors setting this to
#' FALSE could make the diff \emph{slower} (see the \code{unwrap.atomic}
#' parameter). For other uses, particularly with \code{\link{diffChr}}
#' setting this to FALSE can substantially improve performance.
#' @param pager one of \dQuote{auto} (default), \dQuote{on},
#' \dQuote{off}, a \code{\link{Pager}} object, or a list; controls whether and
#' how a pager is used to display the diff output. If you require a
#' particular pager behavior you must use a \code{\link{Pager}}
#' object, or \dQuote{off} to turn off the pager. All other settings will
#' interact with other parameters such as \code{format}, \code{style}, as well
#' as with your system capabilities in order to select the pager expected to
#' be most useful.
#'
#' \dQuote{auto} and \dQuote{on} are the same, except that in non-interactive
#' mode \dQuote{auto} is equivalent to \dQuote{off}. \dQuote{off} will always
#' send output to the console. If \dQuote{on}, whether the output
#' actually gets routed to the pager depends on the pager \code{threshold}
#' setting (see \code{\link{Pager}}). The default behavior is to use the
#' pager associated with the \code{Style} object. The \code{Style} object is
#' itself is determined by the \code{format} or \code{style} parameters.
#'
#' Depending on your system configuration different styles and corresponding
#' pagers will get selected, unless you specify a \code{Pager} object
#' directly. On a system with a system pager that supports ANSI CSI SGR
#' colors, the pager will only trigger if the output is taller than one
#' window. If the system pager is not known to support ANSI colors then the
#' output will be sent as HTML to the IDE viewer if available or to the web
#' browser if not. Even though Rstudio now supports ANSI CSI SGR at the
#' console output is still formatted as HTML and sent to the IDE viewer.
#' Partly this is for continuity of behavior, but also because the default
#' Rstudio pager does not support ANSI CSI SGR, at least as of this writing.
#'
#' If \code{pager} is a list, then the same as with \dQuote{on}, except that
#' the \code{Pager} object associated with the selected \code{Style} object is
#' re-instantiated with the union of the list elements and the existing
#' settings of that \code{Pager}. The list should contain named elements that
#' correspond to the \code{\link{Pager}} instantiation parameters. The names
#' must be specified in full as partial parameter matching will not be carried
#' out because the pager is re-instantiated with \code{\link{new}}.
#'
#' See \code{\link{Pager}}, \code{\link{Style}}, and
#' \code{\link{PaletteOfStyles}} for more details and for instructions on how
#' to modify the default behavior.
#' @param guides TRUE (default), FALSE, or a function that accepts at least two
#' arguments and requires no more than two arguments. Guides
#' are additional context lines that are not strictly part of a hunk, but
#' provide important contextual data (e.g. column headers). If TRUE, the
#' context lines are shown in addition to the normal diff output, typically
#' in a different color to indicate they are not part of the hunk. If a
#' function, the function should accept as the first argument the object
#' being diffed, and the second the character representation of the object.
#' The function should return the indices of the elements of the
#' character representation that should be treated as guides. See
#' \code{\link{guides}} for more details.
#' @param trim TRUE (default), FALSE, or a function that accepts at least two
#' arguments and requires no more than two arguments. Function should compute
#' for each line in captured output what portion of those lines should be
#' diffed. By default, this is used to remove row meta data differences
#' (e.g. \code{[1,]}) so they alone do not show up as differences in the
#' diff. See \code{\link{trim}} for more details.
#' @param rds TRUE (default) or FALSE, if TRUE will check whether
#' \code{target} and/or \code{current} point to a file that can be read with
#' \code{\link{readRDS}} and if so, loads the R object contained in the file
#' and carries out the diff on the object instead of the original argument.
#' Currently there is no mechanism for specifying additional arguments to
#' \code{readRDS}
#' @param unwrap.atomic TRUE (default) or FALSE. Relevant primarily for
#' \code{diffPrint}, if TRUE, and \code{word.diff} is also TRUE, and both
#' \code{target} and \code{current} are \emph{unnamed} one-dimension atomics ,
#' the vectors are unwrapped and diffed element by element, and then
#' re-wrapped. Since \code{diffPrint} is fundamentally a line diff, the
#' re-wrapped lines are lined up in a manner that is as consistent as possible
#' with the unwrapped diff. Lines that contain the location of the word
#' differences will be paired up. Since the vectors may well be wrapped with
#' different periodicities this will result in lines that are paired up that
#' look like they should not be paired up, though the locations of the
#' differences should be. If is entirely possible that setting this parameter
#' to FALSE will result in a slower diff. This happens if two vectors are
#' actually fairly similar, but their line representations are not. For
#' example, in comparing \code{1:100} to \code{c(100, 1:99)}, there is really
#' only one difference at the \dQuote{word} level, but every screen line is
#' different. \code{diffChr} will also do the unwrapping if it is given a
#' character vector that contains output that looks like the atomic vectors
#' described above. This is a bug, but as the functionality could be useful
#' when diffing e.g. \code{capture.output} data, we now declare it a feature.
#' @param line.limit integer(2L) or integer(1L), if length 1 how many lines of
#' output to show, where \code{-1} means no limit. If length 2, the first
#' value indicates the threshold of screen lines to begin truncating output,
#' and the second the number of lines to truncate to, which should be fewer
#' than the threshold. Note that this parameter is implemented on a
#' best-efforts basis and should not be relied on to produce the exact
#' number of lines requested. In particular do not expect it to work well for
#' for values small enough that the banner portion of the diff would have to
#' be trimmed. If you want a specific number of lines use \code{[} or
#' \code{head} / \code{tail}. One advantage of \code{line.limit} over these
#' other options is that you can combine it with \code{context="auto"} and
#' auto \code{max.level} selection (the latter for \code{diffStr}), which
#' allows the diff to dynamically adjust to make best use of the available
#' display lines. \code{[}, \code{head}, and \code{tail} just subset the text
#' of the output.
#' @param hunk.limit integer(2L) or integer (1L), how many diff hunks to show.
#' Behaves similarly to \code{line.limit}. How many hunks are in a
#' particular diff is a function of how many differences, and also how much
#' \code{context} is used since context can cause two hunks to bleed into
#' each other and become one.
#' @param max.diffs integer(1L), number of \emph{differences} (default 50000L)
#' after which we abandon the \code{O(n^2)} diff algorithm in favor of a naive
#' \code{O(n)} one. Set to \code{-1L} to stick to the original algorithm up to
#' the maximum allowed (~INT_MAX/4).
#' @param disp.width integer(1L) number of display columns to take up; note that
#' in \dQuote{sidebyside} \code{mode} the effective display width is half this
#' number (set to 0L to use default widths which are \code{getOption("width")}
#' for normal styles and \code{80L} for HTML styles. Future versions of
#' \code{diffobj} may change this to larger values for two dimensional objects
#' for better diffs (see details).
#' @param ignore.white.space TRUE or FALSE, whether to consider differences in
#' horizontal whitespace (i.e. spaces and tabs) as differences (defaults to
#' TRUE).
#' @param convert.hz.white.space TRUE or FALSE, whether modify input strings
#' that contain tabs and carriage returns in such a way that they display as
#' they would \bold{with} those characters, but without using those
#' characters (defaults to TRUE). The conversion assumes that tab stops are
#' spaced evenly eight characters apart on the terminal. If this is not the
#' case you may specify the tab stops explicitly with \code{tab.stops}.
#' @param tab.stops integer, what tab stops to use when converting hard tabs to
#' spaces. If not integer will be coerced to integer (defaults to 8L). You
#' may specify more than one tab stop. If display width exceeds that
#' addressable by your tab stops the last tab stop will be repeated.
#' @param align numeric(1L) between 0 and 1, proportion of
#' words in a line of \code{target} that must be matched in a line of
#' \code{current} in the same hunk for those lines to be paired up when
#' displayed (defaults to 0.25), or an \code{\link{AlignThreshold}} object.
#' Set to \code{1} to turn off alignment which will cause all lines in a hunk
#' from \code{target} to show up first, followed by all lines from
#' \code{current}. Note that in order to be aligned lines must meet the
#' threshold and have at least 3 matching alphanumeric characters (see
#' \code{\link{AlignThreshold}} for details).
#' @param style \dQuote{auto}, a \code{\link{Style}} object, or a list.
#' \dQuote{auto} by default. If a \code{Style} object, will override the
#' the \code{format}, \code{brightness}, and \code{color.mode} parameters.
#' The \code{Style} object provides full control of diff output styling.
#' If a list, then the same as \dQuote{auto}, except that if the auto-selected
#' \code{Style} requires instantiation (see \code{\link{PaletteOfStyles}}),
#' then the list contents will be used as arguments when instantiating the
#' style object. See \code{\link{Style}} for more details, in particular the
#' examples.
#' @param palette.of.styles \code{\link{PaletteOfStyles}} object; advanced
#' usage, contains all the \code{\link{Style}} objects or
#' \dQuote{classRepresentation} objects extending \code{\link{Style}} that are
#' selected by specifying the \code{format}, \code{brightness}, and
#' \code{color.mode} parameters. See \code{\link{PaletteOfStyles}} for more
#' details.
#' @param frame an environment to use as the evaluation frame for the
#' \code{print/show/str}, calls and for \code{diffObj}, the evaluation frame
#' for the \code{diffPrint} / \code{diffStr} calls. Defaults to the return
#' value of \code{\link{par_frame}}.
#' @param interactive TRUE or FALSE whether the function is being run in
#' interactive mode, defaults to the return value of
#' \code{\link{interactive}}. If in interactive mode, pager will be used if
#' \code{pager} is \dQuote{auto}, and if ANSI styles are not supported and
#' \code{style} is \dQuote{auto}, output will be send to viewer/browser as
#' HTML.
#' @param term.colors integer(1L) how many ANSI colors are supported by the
#' terminal. This variable is provided for when
#' \code{\link[=num_colors]{crayon::num_colors}} does not properly detect how
#' many ANSI colors are supported by your terminal. Defaults to return value
#' of \code{\link[=num_colors]{crayon::num_colors}} and should be 8 or 256 to
#' allow ANSI colors, or any other number to disallow them. This only
#' impacts output format selection when \code{style} and \code{format} are
#' both set to \dQuote{auto}.
#' @param tar.banner character(1L), language, or NULL, used to generate the
#' text to display ahead of the diff section representing the target output.
#' If NULL will use the deparsed \code{target} expression, if language, will
#' use the language as it would the \code{target} expression, if
#' character(1L), will use the string with no modifications. The language
#' mode is provided because \code{diffStr} modifies the expression prior to
#' display (e.g. by wrapping it in a call to \code{str}). Note that it is
#' possible in some cases that the substituted value of \code{target} actually
#' is character(1L), but if you provide a character(1L) value here it will be
#' assumed you intend to use that value literally.
#' @param cur.banner character(1L) like \code{tar.banner}, but for
#' \code{current}
#' @param strip.sgr TRUE, FALSE, or NULL (default), whether to strip ANSI CSI
#' SGR sequences prior to comparison and for display of diff. If NULL,
#' resolves to TRUE if `style` resolves to an ANSI formatted diff, and
#' FALSE otherwise. The default behavior is to avoid confusing diffs where
#' the original SGR and the SGR added by the diff are mixed together.
#' @param sgr.supported TRUE, FALSE, or NULL (default), whether to assume the
#' standard output device supports ANSI CSI SGR sequences. If TRUE, strings
#' will be manipulated accounting for the SGR sequences. If NULL,
#' resolves to TRUE if `style` resolves to an ANSI formatted diff, and
#' to `crayon::has_color()` otherwise. This only controls how the strings are
#' manipulated, not whether SGR is added to format the diff, which is
#' controlled by the `style` parameter. This parameter is exposed for the
#' rare cases where you might wish to control string manipulation behavior
#' directly.
#' @param extra list additional arguments to pass on to the functions used to
#' create text representation of the objects to diff (e.g. \code{print},
#' \code{str}, etc.)
#' @param ... unused, for compatibility of methods with generics
#' @return a \code{Diff} object; this object has a \code{show}
#' method that will display the diff to screen or pager, as well as
#' \code{summary}, \code{any}, and \code{as.character} methods.
#' If you store the return value instead of displaying it to screen, and
#' display it later, it is possible for the display to be thrown off if
#' there are environment changes (e.g. display width changes) in between
#' the time you compute the diff and the time you display it.
#' @rdname diffPrint
#' @name diffPrint
#' @export
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffPrint(letters, letters[-5], pager="off")
setGeneric(
"diffPrint", function(target, current, ...) standardGeneric("diffPrint")
)
#' @rdname diffPrint
setMethod("diffPrint", signature=c("ANY", "ANY"), make_diff_fun(capt_print))
#' Diff Object Structures
#'
#' Compares the \code{str} output of \code{target} and \code{current}. If
#' the \code{max.level} parameter to \code{str} is left unspecified, will
#' attempt to find the largest \code{max.level} that fits within
#' \code{line.limit} and shows at least one difference.
#'
#' Due to the seemingly inconsistent nature of \code{max.level} when used with
#' objects with nested attributes, and also due to the relative slowness of
#' \code{str}, this function simulates the effect of \code{max.level} by hiding
#' nested lines instead of repeatedly calling \code{str} with varying values of
#' \code{max.level}.
#'
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{diffDeparse}} to compare deparsed objects,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @rdname diffStr
#' @export
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' with(mtcars, diffStr(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off"))
setGeneric("diffStr", function(target, current, ...) standardGeneric("diffStr"))
#' @rdname diffStr
setMethod("diffStr", signature=c("ANY", "ANY"), make_diff_fun(capt_str))
#' Diff Character Vectors Element By Element
#'
#' Will perform the diff on the actual string values of the character vectors
#' instead of capturing the printed screen output. Each vector element is
#' treated as a line of text. NA elements are treated as the string
#' \dQuote{NA}. Non character inputs are coerced to character and attributes
#' are dropped with \code{\link{c}}.
#'
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffDeparse}} to compare deparsed objects,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffChr
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffChr(LETTERS[1:5], LETTERS[2:6], pager="off")
setGeneric("diffChr", function(target, current, ...) standardGeneric("diffChr"))
#' @rdname diffChr
setMethod("diffChr", signature=c("ANY", "ANY"), make_diff_fun(capt_chr))
#' Diff Deparsed Objects
#'
#' Perform diff on the character vectors produced by \code{\link{deparse}}ing
#' the objects. Each element counts as a line. If an element contains newlines
#' it will be split into elements new lines by the newlines.
#'
#' @export
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffDeparse
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffDeparse(matrix(1:9, 3), 1:9, pager="off")
setGeneric(
"diffDeparse", function(target, current, ...) standardGeneric("diffDeparse")
)
#' @rdname diffDeparse
setMethod("diffDeparse", signature=c("ANY", "ANY"), make_diff_fun(capt_deparse))
#' Diff Files
#'
#' Reads text files with \code{\link{readLines}} and performs a diff on the
#' resulting character vectors.
#'
#' @export
#' @param target character(1L) or file connection with read capability; if
#' character should point to a text file
#' @param current like \code{target}
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffFile
#' @examples
#' \dontrun{
#' url.base <- "https://raw.githubusercontent.com/wch/r-source"
#' f1 <- file.path(url.base, "29f013d1570e1df5dc047fb7ee304ff57c99ea68/README")
#' f2 <- file.path(url.base, "daf0b5f6c728bd3dbcd0a3c976a7be9beee731d9/README")
#' diffFile(f1, f2)
#' }
setGeneric(
"diffFile", function(target, current, ...) standardGeneric("diffFile")
)
#' @rdname diffFile
setMethod("diffFile", signature=c("ANY", "ANY"), make_diff_fun(capt_file))
#' Diff CSV Files
#'
#' Reads CSV files with \code{\link{read.csv}} and passes the resulting data
#' frames onto \code{\link{diffPrint}}. \code{extra} values are passed as
#' arguments are passed to both \code{read.csv} and \code{print}. To the
#' extent you wish to use different \code{extra} arguments for each of those
#' functions you will need to \code{read.csv} the files and pass them to
#' \code{diffPrint} yourself.
#'
#' @export
#' @param target character(1L) or file connection with read capability;
#' if character should point to a CSV file
#' @param current like \code{target}
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffCsv
#' @examples
#' iris.2 <- iris
#' iris.2$Sepal.Length[5] <- 99
#' f1 <- tempfile()
#' f2 <- tempfile()
#' write.csv(iris, f1, row.names=FALSE)
#' write.csv(iris.2, f2, row.names=FALSE)
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffCsv(f1, f2, pager="off")
#' unlink(c(f1, f2))
setGeneric(
"diffCsv", function(target, current, ...) standardGeneric("diffCsv")
)
#' @rdname diffCsv
setMethod("diffCsv", signature=c("ANY", "ANY"), make_diff_fun(capt_csv))
#' Diff Objects
#'
#' Compare either the \code{print}ed or \code{str} screen representation of
#' R objects depending on which is estimated to produce the most useful
#' diff. The selection process tries to minimize screen lines while maximizing
#' differences shown subject to display constraints. The decision algorithm is
#' likely to evolve over time, so do not rely on this function making
#' a particular selection under specific circumstances. Instead, use
#' \code{\link{diffPrint}} or \code{\link{diffStr}} if you require one or the
#' other output.
#'
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} methods,
#' \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly
#' \code{\link{diffDeparse}} to compare deparsed objects,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffObj(letters, c(letters[1:10], LETTERS[11:26]), pager="off")
#' with(mtcars, diffObj(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off"))
setGeneric("diffObj", function(target, current, ...) standardGeneric("diffObj"))
diff_obj <- make_diff_fun(identity) # we overwrite the body next
body(diff_obj) <- quote({
if(length(extra))
stop("Argument `extra` must be empty in `diffObj`.")
# frame # force frame so that `par_frame` called in this context
# Need to generate calls inside a new child environment so that we do not
# pollute the environment and create potential conflicts with ... args
# used to run this inside a `local` call, but issues cropped up with the
# advent of JIT, and can't recall why just storing arguments at first
# was a problem
args <- as.list(environment())
call.dat <- extract_call(sys.calls(), frame)
err <- make_err_fun(call.dat$call)
if(is.null(args$tar.banner)) args$tar.banner <- call("quote", call.dat$tar)
if(is.null(args$cur.banner)) args$cur.banner <- call("quote", call.dat$cur)
call.print <- as.call(c(list(quote(diffobj::diffPrint)), args))
call.str <- as.call(c(list(quote(diffobj::diffStr)), args))
call.str[["extra"]] <- list(max.level="auto")
res.print <- try(eval(call.print, frame), silent=TRUE)
res.str <- try(eval(call.str, frame), silent=TRUE)
if(inherits(res.str, "try-error"))
err(
"Error in calling `diffStr`: ",
conditionMessage(attr(res.str, "condition"))
)
if(inherits(res.print, "try-error"))
err(
"Error in calling `diffPrint`: ",
conditionMessage(attr(res.print, "condition"))
)
# Run both the print and str versions, and then decide which to use based
# on some weighting of various factors including how many lines needed to be
# omitted vs. how many differences were reported
diff.p <- count_diff_hunks(res.print@diffs)
diff.s <- count_diff_hunks(res.str@diffs)
diff.l.p <- diff_line_len(
res.print@diffs, res.print@etc, tar.capt=res.print@tar.dat$raw,
cur.capt=res.print@cur.dat$raw
)
diff.l.s <- diff_line_len(
res.str@diffs, res.str@etc, tar.capt=res.str@tar.dat$raw,
cur.capt=res.str@cur.dat$raw
)
# How many lines of the input are in the diffs, vs how many lines of input
diff.line.ratio.p <- lineCoverage(res.print)
diff.line.ratio.s <- lineCoverage(res.str)
# Only show the one with differences
res <- if(!diff.s && diff.p) {
res.print
} else if(!diff.p && diff.s) {
res.str
# If one fits in full and the other doesn't, show the one that fits in full
} else if(
!res.str@trim.dat$lines[[1L]] &&
res.print@trim.dat$lines[[1L]]
) {
res.str
} else if(
res.str@trim.dat$lines[[1L]] &&
!res.print@trim.dat$lines[[1L]]
) {
res.print
} else if (diff.l.p <= console_lines() / 2) {
# Always use print if print output is reasonable size
res.print
} else {
# Calculate the trade offs between the two options
s.score <- diff.s / diff.l.s * diff.line.ratio.s
p.score <- diff.p / diff.l.p * diff.line.ratio.p
if(p.score >= s.score) res.print else res.str
}
res
})
#' @export
setMethod("diffObj", signature=c("ANY", "ANY"), diff_obj)
|