File: prep_dat.r

package info (click to toggle)
r-cran-metadat 1.2-0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,108 kB
  • sloc: sh: 13; makefile: 2
file content (143 lines) | stat: -rw-r--r-- 5,427 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
prep_dat <- function(rebuild=FALSE, overwrite, pkgdir) {

   cat("\n")

   # if 'pkgdir' argument is unspecified, assume it is the current working directory
   if (missing(pkgdir)) {
      pkgdir <- normalizePath(".")
      cat("Package root directory:", pkgdir, "\n\n")
   }

   # check if package root directory actually exists and that it has
   # a DESCRIPTION file with "Package: metadat" in the first line
   if (dir.exists(pkgdir)) {
      if (!file.exists(paste0(pkgdir, "/DESCRIPTION")))
         stop("No DESCRIPTION file in the package root directory.")
      if (readLines(paste0(pkgdir, "/DESCRIPTION"), n=1) != "Package: metadat")
         stop("DESCRIPTION file in the package root directory is not for the 'metadat' package.")
   } else {
      stop("Specified 'pkgdir' directory does not exist.")
   }

   data_raw.dir <- paste0(pkgdir, "/data-raw/")
   data.dir     <- paste0(pkgdir, "/data/")
   man.dir      <- paste0(pkgdir, "/man/")

   # check that directories actually exist
   if (!dir.exists(data_raw.dir))
      stop("Cannot find 'data-raw' directory.")
   if (!dir.exists(data.dir))
      stop("Cannot find 'data' directory.")
   if (!dir.exists(man.dir))
      stop("Cannot find 'man' directory.")

   # load .rfiles.txt (if it exists)
   if (file.exists(paste0(data_raw.dir, ".rfiles.txt"))) {
      .rfiles <- read.table(paste0(data_raw.dir, ".rfiles.txt"), header=FALSE, as.is=TRUE)[[1]]
   } else {
      .rfiles <- NULL
   }

   # try running Rscript
   tmp <- try(suppressWarnings(system2("Rscript", args="-e 1", stdout=TRUE, stderr=TRUE)), silent=TRUE)
   if (inherits(tmp, "try-error"))
      stop("Cannot run 'Rscript'. Make sure that Rscript/Rscript.exe is on the system path.")

   # get names of all data preparation scripts (.r/.R files) in the 'data-raw' directory
   rfiles <- list.files(path=data_raw.dir, pattern=".[rR]$")

   # paste header for output
   cat("File", paste0(rep(" ", max(nchar(rfiles))-4), collapse=""), "new ", "rda ", "build ", "Rd ", "create", "\n")
   cat(paste0(rep("-", max(nchar(rfiles))+29), collapse=""), "\n")

   # process all data preparation scripts

   for (i in seq_along(rfiles)) {

      # paste name of the data preparation script (and enough space to align everything)
      cat(rfiles[i], paste0(rep(" ", max(nchar(rfiles))-nchar(rfiles[i])+1), collapse=""))

      # check if the data preparation file is already in .rfiles.txt
      rfile.exists <- rfiles[i] %in% .rfiles

      # [new]: paste T/F if it is a new script or not
      cat(ifelse(!rfile.exists, "T    ", "F    "))

      # get 'root' name of the data preparation script (remove .r/.R extension)
      root <- substr(rfiles[i], 1, nchar(rfiles[i])-2)

      # get names of all .rda files in the 'data' directory
      rda.files <- list.files(path=data.dir, pattern=".rda$")

      # grep names of all <root>.<anything>.rda files (could be one or multiple)
      rda.files <- grep(paste0("^", root, ".*.rda"), rda.files, value=TRUE)

      # check if <root>.<anything>.rda files already exist
      rda.files.exist <- ifelse(length(rda.files) > 0, TRUE, FALSE)

      # [rda]: paste T/F if the rda files already exist or not
      cat(ifelse(rda.files.exist, "T    ", "F    "))

      # if rebuild=TRUE or if the rda files do not exist, try running the data preparation script
      # [build]: paste T/F if the data processing script was run (without error)
      if (rebuild || !rda.files.exist) {

         #rfilerun <- try(source(paste0(data_raw.dir, rfiles[i])), silent=TRUE)
         #if (inherits(rfilerun, "try-error")) {

         # run each script in its own independent process
         cmd <- paste0(data_raw.dir, rfiles[i])
         out <- suppressWarnings(system2("Rscript", cmd, stdout=TRUE, stderr=TRUE))

         if (isTRUE(attributes(out)$status == 1)) {
            warning("Error while running ", rfiles[i], ".", call.=FALSE)
            cat("F      ")
         } else {
            cat("T      ")
         }

      } else {
         cat("F      ")
      }

      # list all files in the 'data' directory (possibly includes non-.rda files)
      data.files <- list.files(path=data.dir)

      # check if there are now any non-.rda files in 'data'; if so, throw an error
      if (any(tools::file_ext(data.files) != "rda"))
         stop("\n\nThere are non-.rda files in the 'data' directory.\nData preparation scripts must create only .rda files.")

      # check if <root>.Rd file exists in 'man' directory
      rd.exists <- file.exists(paste0(man.dir, root, ".Rd"))

      # [Rd]: paste T/F if <root>.Rd file exists or not
      if (rd.exists) {
         cat("T   ")
      } else {
         cat("F   ")
      }

      # if it doesn't exist or if it is in 'overwrite' vector, create template <root>.Rd file
      # [create]: paste T/F if template <root>.Rd file is created
      if (!rd.exists || !missing(overwrite) && paste0(root, ".Rd") %in% overwrite) {
         cat("T      ")
         .rd_generator(root, pkgdir, overwrite)
      } else {
         cat("F      ")
      }

      cat("\n")

   }

   # make sure every .rda file in 'data' is lower case
   for (data.file in data.files) {
      file.rename(paste0(data.dir, data.file), paste0(data.dir, tolower(data.file)))
   }

   # write updated .rfiles.txt file to 'data-raw' directory
   write.table(rfiles, file=paste0(data_raw.dir, ".rfiles.txt"), row.names=FALSE, col.names=FALSE)

   cat("\n")

}