1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_read.R
\name{data_read}
\alias{data_read}
\title{Read (import) data files from various sources}
\usage{
data_read(
path,
path_catalog = NULL,
encoding = NULL,
convert_factors = TRUE,
verbose = TRUE,
...
)
}
\arguments{
\item{path}{Character string, the file path to the data file.}
\item{path_catalog}{Character string, path to the catalog file. Only relevant
for SAS data files.}
\item{encoding}{The character encoding used for the file. Usually not needed.}
\item{convert_factors}{If \code{TRUE} (default), numeric variables, where all
values have a value label, are assumed to be categorical and converted
into factors. If \code{FALSE}, no variable types are guessed and no conversion
of numeric variables into factors will be performed. See also section
'Differences to other packages'.}
\item{verbose}{Toggle warnings and messages.}
\item{...}{Arguments passed to the related \verb{read_*()} function.}
}
\value{
A data frame.
}
\description{
This functions imports data from various file types. It is a small wrapper
around \code{haven::read_spss()}, \code{haven::read_stata()}, \code{haven::read_sas()},
\code{readxl::read_excel()} and \code{data.table::fread()} resp. \code{readr::read_delim()}
(the latter if package \strong{data.table} is not installed). Thus, supported file
types for importing data are data files from SPSS, SAS or Stata, Excel files
or text files (like '.csv' files). All non-supported file types are passed
to \code{rio::import()}.
}
\section{Supported file types}{
\code{data_read()} is a wrapper around the \strong{haven}, \strong{data.table}, \strong{readr}
\strong{readxl} and \strong{rio} packages. Currently supported file types are \code{.txt},
\code{.csv}, \code{.xls}, \code{.xlsx}, \code{.sav}, \code{.por}, \code{.dta} and \code{.sas} (and related
files). All other file types are passed to \code{rio::import()}.
}
\section{Compressed files (zip) and URLs}{
\code{data_read()} can also read the above mentioned files from URLs or from
inside zip-compressed files. Thus, \code{path} can also be a URL to a file like
\code{"http://www.url.com/file.csv"}. When \code{path} points to a zip-compressed file,
and there are multiple files inside the zip-archive, then the first supported
file is extracted and loaded.
}
\section{General behaviour}{
\code{data_read()} detects the appropriate \verb{read_*()} function based on the
file-extension of the data file. Thus, in most cases it should be enough to
only specify the \code{path} argument. However, if more control is needed, all
arguments in \code{...} are passed down to the related \verb{read_*()} function.
}
\section{Differences to other packages that read foreign data formats}{
\code{data_read()} is most comparable to \code{rio::import()}. For data files from
SPSS, SAS or Stata, which support labelled data, variables are converted into
their most appropriate type. The major difference to \code{rio::import()} is that
\code{data_read()} automatically converts fully labelled numeric variables into
factors, where imported value labels will be set as factor levels. If a
numeric variable has \emph{no} value labels or less value labels than values, it
is not converted to factor. In this case, value labels are preserved as
\code{"labels"} attribute. Character vectors are preserved. Use
\code{convert_factors = FALSE} to remove the automatic conversion of numeric
variables to factors.
}
|