File: readText.R

package info (click to toggle)
r-cran-r.rsp 0.46.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,472 kB
  • sloc: javascript: 612; tcl: 304; sh: 18; makefile: 16
file content (131 lines) | stat: -rw-r--r-- 3,532 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
###########################################################################/**
# @RdocFunction .readText
#
# @title "Reads the content of a local or an online text file"
#
# \description{
#  @get "title".
# }
#
# @synopsis
#
# \arguments{
#   \item{con}{A @character string specifying a local file or a URL,
#      or a @connection.}
#   \item{...}{Not used.}
#   \item{maxAge}{A @numeric scalar specifying the maximum age (in seconds)
#      of cached URL contents before downloading and recaching.
#      If zero or less, the URL will always be downloaded.}
# }
#
# \value{
#   Returns a @character string.
# }
#
# \section{Caching URL}{
#   When reading online URLs, it may take time a significant time to
#   read its content.  If the content is changing rarely, it is possible
#   to cache the content locally.  If a cached version is found, then it
#   is read instead.
#   It is possible to control how often a file should be recached.  If the
#   cache is older than argument \code{maxAge} (in seconds), then the file
#   is redownloaded and recached.
# }
#
# \section{Newline substitution}{
#   All occurrences of \code{\\r\\n} and \code{\\r} are replaced with
#   \code{\\n} such that all lines are ending in \code{\\n} regardless
#   of encoding.
# }
#
# @author
#
# @keyword file
# @keyword IO
# @keyword internal
#*/###########################################################################
.readText <- function(con, ..., maxAge=getOption("R.rsp::downloadIfOlderThan", -Inf)) {
  if (is.character(con)) {
    file <- con

    # Is the file local and an URL?
    isUrl <- isUrl(file)


    # (a) If URL, download to temporary directory
    if (isUrl) {
      url <- file
      path <- tempdir()
      filename <- getChecksum(url)
      pathname <- file.path(path, filename)

      # By default, download URL
      download <- TRUE

      # Unless...
      if (isFile(pathname)) {
        # Age (in seconds) when downloaded file is considered too old
        maxAge <- as.double(maxAge)
        if (is.na(maxAge)) maxAge <- -Inf
        maxAge <- Arguments$getDouble(maxAge)
        # Time when file was downloaded
        mtime <- file.info(pathname)$mtime
        # Age of downloaded file in seconds
        dtime <- Sys.time() - mtime
        units(dtime) <- "secs"
        download <- isTRUE(dtime > maxAge)
      }

      if (download) {
        withoutGString({
          pathname <- downloadFile(url, filename=pathname, skip=FALSE)
        })
      }

      if (isFile(pathname)) file <- pathname
    } # if (isUrl)


    # (b) Try to open file connection
    con <- tryCatch({
      suppressWarnings({
        file(file, open="rb")
      })
    }, error = function(ex) {
      # (b) If failed, try to download file first
      if (regexpr("^https://", file, ignore.case=TRUE) == -1L) {
        throw(ex)
      }
      url <- file
      withoutGString({
        pathname <- downloadFile(url, path=tempdir())
      })
      file(pathname, open="rb")
    })
    on.exit(close(con))
  }

  # Sanity check
  stop_if_not(inherits(con, "connection"))


  bfr <- NULL
  while (TRUE) {
    bfrT <- readChar(con, nchars=1e6)
    if (length(bfrT) == 0L) break
    bfrT <- gsub("\r\n", "\n", bfrT, fixed=TRUE)
    bfrT <- gsub("\r", "\n", bfrT, fixed=TRUE)
    bfr <- c(bfr, bfrT)
  }
  bfr <- paste(bfr, collapse="")
  
  if (FALSE) {
    bfr <- strsplit(bfr, split="\n", fixed=TRUE)
    bfr <- unlist(bfr, use.names=FALSE)
  }

  ## Sanity check
  stop_if_not(is.character(bfr), length(bfr) == 1L)

  bfr
} # .readText()