File: test_read_csv.test

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (121 lines) | stat: -rw-r--r-- 3,326 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# name: test/sql/copy/csv/test_read_csv.test
# description: Test read CSV function
# group: [csv]

statement ok
PRAGMA enable_verification

query II
select delimiter, quote FROM sniff_csv('{DATA_DIR}/csv/stats_3_muta_10_21.csv')
----
,	"

query III
FROM read_csv('{DATA_DIR}/csv/comments/comment_skip.csv', comment = '#', delim = ',', skip=1, auto_detect = false, header = 1, columns = {'x':'varchar','y':'varchar','z':'varchar'})
----
1	2	3
4	5	6

query II
FROM read_csv('{DATA_DIR}/csv/multi_quote.csv', null_padding = true)
----
2019-01-01	text
2019-02-01	text
2019-03-01	text
2019-04-01	text
2019-05-01	text
2019-06-01	text, with comma
2019-09-01	'text'

query II
FROM read_csv('{DATA_DIR}/csv/bad_escape.csv')
----
332	Surname, Firstname
123	foo ("foo")

query I
FROM read_csv('{DATA_DIR}/csv/quoted_values_delimited.csv')
----
value1;value2;value3
value1;value2;value3
value1;value2;value3

query I
FROM read_csv('{DATA_DIR}/csv/quoted_values_delimited.csv', ignore_errors = true)
----
value1;value2;value3
value1;value2;value3
value1;value2;value3

query III
FROM read_csv('{DATA_DIR}/csv/quoted_values_delimited.csv', quote = '')
----
"value1	value2	value3"
"value1	value2	value3"
"value1	value2	value3"

query I
FROM read_csv_auto('{DATA_DIR}/csv/test/dateformat.csv')
----
2019-06-05

# dateformat
statement ok
CREATE TABLE dates (d DATE);

# base date format does not work here
statement ok
INSERT INTO dates SELECT * FROM read_csv('{DATA_DIR}/csv/test/dateformat.csv', columns=STRUCT_PACK(d := 'DATE'), header=0)

query I
SELECT * FROM dates
----
2019-06-05

# dateformat should also work with auto format
statement ok
INSERT INTO dates SELECT * FROM read_csv_auto('{DATA_DIR}/csv/test/dateformat.csv', dateformat='%m/%d/%Y')

query I
SELECT * FROM dates ORDER BY 1
----
2019-05-06
2019-06-05

# we can also do this for timestamps
# as long as we make the date format fail
statement ok
CREATE TABLE timestamps AS SELECT * FROM read_csv_auto('{DATA_DIR}/csv/test/dateformat.csv', timestampformat='%m/%d/%Y', columns=STRUCT_PACK(d := 'TIMESTAMP'))

query I
SELECT * FROM timestamps
----
2019-05-06 00:00:00

# create a view using the read_csv function
statement ok
CREATE VIEW lineitem AS SELECT * FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', sep='|', columns=STRUCT_PACK(l_orderkey := 'INT', l_partkey := 'INT', l_suppkey := 'INT', l_linenumber := 'INT', l_quantity := 'INTEGER', l_extendedprice := 'DOUBLE', l_discount := 'DOUBLE', l_tax := 'DOUBLE', l_returnflag := 'VARCHAR', l_linestatus := 'VARCHAR', l_shipdate := 'DATE', l_commitdate := 'DATE', l_receiptdate := 'DATE', l_shipinstruct := 'VARCHAR', l_shipmode := 'VARCHAR', l_comment := 'VARCHAR'));

# each of these will read the CSV again through the view
query I
SELECT COUNT(*) FROM lineitem
----
10

query IT
SELECT l_partkey, RTRIM(l_comment) FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;
----
15519	egular courts above the
6731	ly final dependencies: slyly bold
6370	riously. regular, express dep
214	lites. fluffily even de
2403	 pending foxes. slyly re
1564	arefully slyly ex

# test incorrect usage of read_csv function
# wrong argument type
statement error
SELECT * FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', sep='|', columns=STRUCT_PACK(l_orderkey := 5))
----
read_csv requires a type specification as string