File: Import.R

package info (click to toggle)
car 3.1-5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,496 kB
  • sloc: makefile: 2
file content (43 lines) | stat: -rw-r--r-- 2,325 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# function Import March 14, 2017
# This add two arguments to the `import` file in the rio package
# import is just a front end to a number of file reading files and packages in R
# 3/14/2017:  S. Weisberg, wrote the file, that adds
#             row.names=TRUE, default, will select the left-most column of character data in the data file as
#             row names subject to length(x) == length(unique(x))
#             charAsFactor=TRUE converts character to factor if length(x) > length(unique(x))
#             logicalAsFactor=charAsFactor converts logical to factor
#             These arguments are read only if format %in% c("txt", "csv", "xls", "xlsx", "ods").
# 4/2/2017:  S. Weisberg changed and simplified arguments.
# 5/22/2017: S. Weisberg, fixed bug reading files with one character column (added drop=FALSE)
# 5/6/2020: S. Weisberg, changed default for stringsAsFactors to FALSE.
# 11/2/2021: A. Zeileis, check for rio availability (so that rio can be in Suggests only)

Import <- function(file, format, ..., row.names=TRUE,
                   stringsAsFactors = FALSE){
  if(!requireNamespace("rio")) stop("Import() relies on rio::import(), please install package 'rio'")
  d <- rio::import(file, format, ...)
  fmt <- if(!missing(format)) format else{
    pos <- regexpr("\\.([[:alnum:]]+)$", file)
    ifelse(pos > -1L, substring(file, pos + 1L), "")
  }
# check for rows with no data
  d <- d[!apply(d, 1, function(row) all(is.na(row))), ]
  if(fmt %in% c("txt", "csv", "xls", "xlsx", "ods")){
    classes <- unlist(lapply(as.list(d), class))
    char <- classes %in% c("character", "logical")
    if(!any(char)) return(d)
    allUnique <- rep(FALSE, dim(d)[2])
      allUnique[char] <- apply(d[, char, drop=FALSE], 2, function(x) length(x) == length(unique(x)))
      if(row.names == TRUE){
        if(any(allUnique)){
          row.namesCol <- which(allUnique)[1] # use first non-repeating character col as row.names
          row.names(d) <- d[[row.namesCol]]   # set the row.names
          d <- d[, -row.namesCol]             # delete row.names column from data.frame
          allUnique <- allUnique[-row.namesCol]
          char <- char[-row.namesCol]}
      }
      if(stringsAsFactors & any(!allUnique)){
        for(j in which(char & !allUnique)) d[, j] <- factor(d[, j])
      }}
  return(d)
}