File: csv_convert_options.Rd

package info (click to toggle)
apache-arrow 23.0.1-1
  • links: PTS
  • area: main
  • in suites: sid
  • size: 76,220 kB
  • sloc: cpp: 654,608; python: 70,522; ruby: 45,964; ansic: 18,742; sh: 7,365; makefile: 669; javascript: 125; xml: 41
file content (74 lines) | stat: -rw-r--r-- 3,014 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csv.R
\name{csv_convert_options}
\alias{csv_convert_options}
\title{CSV Convert Options}
\usage{
csv_convert_options(
  check_utf8 = TRUE,
  null_values = c("", "NA"),
  true_values = c("T", "true", "TRUE"),
  false_values = c("F", "false", "FALSE"),
  strings_can_be_null = FALSE,
  col_types = NULL,
  auto_dict_encode = FALSE,
  auto_dict_max_cardinality = 50L,
  include_columns = character(),
  include_missing_columns = FALSE,
  timestamp_parsers = NULL,
  decimal_point = "."
)
}
\arguments{
\item{check_utf8}{Logical: check UTF8 validity of string columns?}

\item{null_values}{Character vector of recognized spellings for null values.
Analogous to the \code{na.strings} argument to
\code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{\link[readr:read_delim]{readr::read_csv()}}.}

\item{true_values}{Character vector of recognized spellings for \code{TRUE} values}

\item{false_values}{Character vector of recognized spellings for \code{FALSE} values}

\item{strings_can_be_null}{Logical: can string / binary columns have
null values? Similar to the \code{quoted_na} argument to \code{\link[readr:read_delim]{readr::read_csv()}}}

\item{col_types}{A \code{Schema} or \code{NULL} to infer types}

\item{auto_dict_encode}{Logical: Whether to try to automatically
dictionary-encode string / binary data (think \code{stringsAsFactors}).
This setting is ignored for non-inferred columns (those in \code{col_types}).}

\item{auto_dict_max_cardinality}{If \code{auto_dict_encode}, string/binary columns
are dictionary-encoded up to this number of unique values (default 50),
after which it switches to regular encoding.}

\item{include_columns}{If non-empty, indicates the names of columns from the
CSV file that should be actually read and converted (in the vector's order).}

\item{include_missing_columns}{Logical: if \code{include_columns} is provided, should
columns named in it but not found in the data be included as a column of
type \code{null()}? The default (\code{FALSE}) means that the reader will instead
raise an error.}

\item{timestamp_parsers}{User-defined timestamp parsers. If more than one
parser is specified, the CSV conversion logic will try parsing values
starting from the beginning of this vector. Possible values are
(a) \code{NULL}, the default, which uses the ISO-8601 parser;
(b) a character vector of \link[base:strptime]{strptime} parse strings; or
(c) a list of \link{TimestampParser} objects.}

\item{decimal_point}{Character to use for decimal point in floating point numbers.}
}
\description{
CSV Convert Options
}
\examples{
\dontshow{if (arrow_with_dataset()) withAutoprint(\{ # examplesIf}
tf <- tempfile()
on.exit(unlink(tf))
writeLines("x\n1\nNULL\n2\nNA", tf)
read_csv_arrow(tf, convert_options = csv_convert_options(null_values = c("", "NA", "NULL")))
open_csv_dataset(tf, convert_options = csv_convert_options(null_values = c("", "NA", "NULL")))
\dontshow{\}) # examplesIf}
}