File: read_excel.Rd

package info (click to toggle)
r-cran-readxl 1.4.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,044 kB
  • sloc: ansic: 4,854; cpp: 3,213; makefile: 2
file content (192 lines) | stat: -rw-r--r-- 6,096 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_excel.R
\name{read_excel}
\alias{read_excel}
\alias{read_xls}
\alias{read_xlsx}
\title{Read xls and xlsx files}
\usage{
read_excel(
  path,
  sheet = NULL,
  range = NULL,
  col_names = TRUE,
  col_types = NULL,
  na = "",
  trim_ws = TRUE,
  skip = 0,
  n_max = Inf,
  guess_max = min(1000, n_max),
  progress = readxl_progress(),
  .name_repair = "unique"
)

read_xls(
  path,
  sheet = NULL,
  range = NULL,
  col_names = TRUE,
  col_types = NULL,
  na = "",
  trim_ws = TRUE,
  skip = 0,
  n_max = Inf,
  guess_max = min(1000, n_max),
  progress = readxl_progress(),
  .name_repair = "unique"
)

read_xlsx(
  path,
  sheet = NULL,
  range = NULL,
  col_names = TRUE,
  col_types = NULL,
  na = "",
  trim_ws = TRUE,
  skip = 0,
  n_max = Inf,
  guess_max = min(1000, n_max),
  progress = readxl_progress(),
  .name_repair = "unique"
)
}
\arguments{
\item{path}{Path to the xls/xlsx file.}

\item{sheet}{Sheet to read. Either a string (the name of a sheet), or an
integer (the position of the sheet). Ignored if the sheet is specified via
\code{range}. If neither argument specifies the sheet, defaults to the first
sheet.}

\item{range}{A cell range to read from, as described in \link{cell-specification}.
Includes typical Excel ranges like "B3:D87", possibly including the sheet
name like "Budget!B2:G14", and more. Interpreted strictly, even if the
range forces the inclusion of leading or trailing empty rows or columns.
Takes precedence over \code{skip}, \code{n_max} and \code{sheet}.}

\item{col_names}{\code{TRUE} to use the first row as column names, \code{FALSE} to get
default names, or a character vector giving a name for each column. If user
provides \code{col_types} as a vector, \code{col_names} can have one entry per
column, i.e. have the same length as \code{col_types}, or one entry per
unskipped column.}

\item{col_types}{Either \code{NULL} to guess all from the spreadsheet or a
character vector containing one entry per column from these options:
"skip", "guess", "logical", "numeric", "date", "text" or "list". If exactly
one \code{col_type} is specified, it will be recycled. The content of a cell in
a skipped column is never read and that column will not appear in the data
frame output. A list cell loads a column as a list of length 1 vectors,
which are typed using the type guessing logic from \code{col_types = NULL}, but
on a cell-by-cell basis.}

\item{na}{Character vector of strings to interpret as missing values. By
default, readxl treats blank cells as missing data.}

\item{trim_ws}{Should leading and trailing whitespace be trimmed?}

\item{skip}{Minimum number of rows to skip before reading anything, be it
column names or data. Leading empty rows are automatically skipped, so this
is a lower bound. Ignored if \code{range} is given.}

\item{n_max}{Maximum number of data rows to read. Trailing empty rows are
automatically skipped, so this is an upper bound on the number of rows in
the returned tibble. Ignored if \code{range} is given.}

\item{guess_max}{Maximum number of data rows to use for guessing column
types.}

\item{progress}{Display a progress spinner? By default, the spinner appears
only in an interactive session, outside the context of knitting a document,
and when the call is likely to run for several seconds or more. See
\code{\link[=readxl_progress]{readxl_progress()}} for more details.}

\item{.name_repair}{Handling of column names. Passed along to
\code{\link[tibble:as_tibble]{tibble::as_tibble()}}. readxl's default is `.name_repair = "unique", which
ensures column names are not empty and are unique.}
}
\value{
A \link[tibble:tibble-package]{tibble}
}
\description{
Read xls and xlsx files

\code{read_excel()} calls \code{\link[=excel_format]{excel_format()}} to determine if \code{path} is xls or xlsx,
based on the file extension and the file itself, in that order. Use
\code{read_xls()} and \code{read_xlsx()} directly if you know better and want to
prevent such guessing.
}
\examples{
datasets <- readxl_example("datasets.xlsx")
read_excel(datasets)

# Specify sheet either by position or by name
read_excel(datasets, 2)
read_excel(datasets, "mtcars")

# Skip rows and use default column names
read_excel(datasets, skip = 10, col_names = FALSE)

# Recycle a single column type
read_excel(datasets, col_types = "text")

# Specify some col_types and guess others
read_excel(
  readxl_example("deaths.xlsx"),
  skip = 4, n_max = 10, col_names = TRUE,
  col_types = c("text", "text", "guess", "guess", "guess", "guess")
)

# Accomodate a column with disparate types via col_type = "list"
df <- read_excel(readxl_example("clippy.xlsx"), col_types = c("text", "list"))
df
df$value
sapply(df$value, class)

# Limit the number of data rows read
read_excel(datasets, n_max = 3)

# Read from an Excel range using A1 or R1C1 notation
read_excel(datasets, range = "C1:E7")
read_excel(datasets, range = "R1C2:R2C5")

# Specify the sheet as part of the range
read_excel(datasets, range = "mtcars!B1:D5")

# Read only specific rows or columns
read_excel(datasets, range = cell_rows(102:151), col_names = FALSE)
read_excel(datasets, range = cell_cols("B:D"))

# Get a preview of column names
names(read_excel(readxl_example("datasets.xlsx"), n_max = 0))

# exploit full .name_repair flexibility from tibble

# "universal" names are unique and syntactic
read_excel(
  readxl_example("deaths.xlsx"),
  range = "arts!A5:F15",
  .name_repair = "universal"
)

# specify name repair as a built-in function
read_excel(readxl_example("clippy.xlsx"), .name_repair = toupper)

# specify name repair as a custom function
my_custom_name_repair <- function(nms) tolower(gsub("[.]", "_", nms))
read_excel(
  readxl_example("datasets.xlsx"),
  .name_repair = my_custom_name_repair
)

# specify name repair as an anonymous function
read_excel(
  readxl_example("datasets.xlsx"),
  sheet = "chickwts",
  .name_repair = ~ substr(.x, start = 1, stop = 3)
)
}
\seealso{
\link{cell-specification} for more details on targetting cells with the
\code{range} argument
}