File: test_read_csv.test

package info (click to toggle)

duckdb 1.5.1-2

links: PTS, VCS
area: main
in suites:
size: 299,196 kB
sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558

file content (121 lines) | stat: -rw-r--r-- 3,326 bytes

parent folder | download | duplicates (4)

# name: test/sql/copy/csv/test_read_csv.test
# description: Test read CSV function
# group: [csv]

statement ok
PRAGMA enable_verification

query II
select delimiter, quote FROM sniff_csv('{DATA_DIR}/csv/stats_3_muta_10_21.csv')
----
,	"

query III
FROM read_csv('{DATA_DIR}/csv/comments/comment_skip.csv', comment = '#', delim = ',', skip=1, auto_detect = false, header = 1, columns = {'x':'varchar','y':'varchar','z':'varchar'})
----
1	2	3
4	5	6

query II
FROM read_csv('{DATA_DIR}/csv/multi_quote.csv', null_padding = true)
----
2019-01-01	text
2019-02-01	text
2019-03-01	text
2019-04-01	text
2019-05-01	text
2019-06-01	text, with comma
2019-09-01	'text'

query II
FROM read_csv('{DATA_DIR}/csv/bad_escape.csv')
----
332	Surname, Firstname
123	foo ("foo")

query I
FROM read_csv('{DATA_DIR}/csv/quoted_values_delimited.csv')
----
value1;value2;value3
value1;value2;value3
value1;value2;value3

query I
FROM read_csv('{DATA_DIR}/csv/quoted_values_delimited.csv', ignore_errors = true)
----
value1;value2;value3
value1;value2;value3
value1;value2;value3

query III
FROM read_csv('{DATA_DIR}/csv/quoted_values_delimited.csv', quote = '')
----
"value1	value2	value3"
"value1	value2	value3"
"value1	value2	value3"

query I
FROM read_csv_auto('{DATA_DIR}/csv/test/dateformat.csv')
----
2019-06-05

# dateformat
statement ok
CREATE TABLE dates (d DATE);

# base date format does not work here
statement ok
INSERT INTO dates SELECT * FROM read_csv('{DATA_DIR}/csv/test/dateformat.csv', columns=STRUCT_PACK(d := 'DATE'), header=0)

query I
SELECT * FROM dates
----
2019-06-05

# dateformat should also work with auto format
statement ok
INSERT INTO dates SELECT * FROM read_csv_auto('{DATA_DIR}/csv/test/dateformat.csv', dateformat='%m/%d/%Y')

query I
SELECT * FROM dates ORDER BY 1
----
2019-05-06
2019-06-05

# we can also do this for timestamps
# as long as we make the date format fail
statement ok
CREATE TABLE timestamps AS SELECT * FROM read_csv_auto('{DATA_DIR}/csv/test/dateformat.csv', timestampformat='%m/%d/%Y', columns=STRUCT_PACK(d := 'TIMESTAMP'))

query I
SELECT * FROM timestamps
----
2019-05-06 00:00:00

# create a view using the read_csv function
statement ok
CREATE VIEW lineitem AS SELECT * FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', sep='|', columns=STRUCT_PACK(l_orderkey := 'INT', l_partkey := 'INT', l_suppkey := 'INT', l_linenumber := 'INT', l_quantity := 'INTEGER', l_extendedprice := 'DOUBLE', l_discount := 'DOUBLE', l_tax := 'DOUBLE', l_returnflag := 'VARCHAR', l_linestatus := 'VARCHAR', l_shipdate := 'DATE', l_commitdate := 'DATE', l_receiptdate := 'DATE', l_shipinstruct := 'VARCHAR', l_shipmode := 'VARCHAR', l_comment := 'VARCHAR'));

# each of these will read the CSV again through the view
query I
SELECT COUNT(*) FROM lineitem
----
10

query IT
SELECT l_partkey, RTRIM(l_comment) FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;
----
15519	egular courts above the
6731	ly final dependencies: slyly bold
6370	riously. regular, express dep
214	lites. fluffily even de
2403	 pending foxes. slyly re
1564	arefully slyly ex

# test incorrect usage of read_csv function
# wrong argument type
statement error
SELECT * FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', sep='|', columns=STRUCT_PACK(l_orderkey := 5))
----
read_csv requires a type specification as string