File: test_csv_auto.test

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (309 lines) | stat: -rw-r--r-- 6,338 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# name: test/sql/copy/csv/auto/test_csv_auto.test
# description: Test csv dialect detection
# group: [auto]

statement ok
PRAGMA enable_verification

statement ok 
PRAGMA verify_parallelism

query II
FROM read_csv('{DATA_DIR}/csv/repromarket.csv',
   columns={
      'email': 'varchar',
      'password': 'varchar'
   },
   all_varchar=true,
   delim=':',
   header=false,
   skip=0,
   null_padding=true,
   ignore_errors=true,
   strict_mode=false
);
----
nemanja.krpovic@gmail.com	krlleta
vega@example.combogus	NULL
Vega-Inject	bogus
mirkofoto@gmail.com	mirko

query I
FROM read_csv('{DATA_DIR}/csv/pipe_delim.csv', columns={'a': 'VARCHAR'}, auto_detect=False)
----
one|two|three|four
1|2|3|4

query I
FROM read_csv('{DATA_DIR}/csv/nullterm.csv')
----
\0world\0

query I
FROM read_csv('{DATA_DIR}/csv/nullterm.csv', quote = '"', escape = '"')
----
\0world\0

query I
FROM read_csv('{DATA_DIR}/csv/single_quote.csv', quote = '"')
----
'Doc'

query I
select columns FROM sniff_csv('{DATA_DIR}/csv/auto/mock_duckdb_test_data.csv', ignore_errors = true);
----
[{'name': id, 'type': BIGINT}, {'name': name, 'type': VARCHAR}, {'name': age, 'type': BIGINT}, {'name': sex, 'type': VARCHAR}, {'name': state, 'type': VARCHAR}]

query IIIII
FROM read_csv('{DATA_DIR}/csv/auto/mock_duckdb_test_data.csv', ignore_errors = true,
   strict_mode=true)
----
1	James	30	M	AL
2	Jill	32	F	CO
4	John	34	M	AS
5	Matthew	31	M	NULL
7	Olivia	36	F	OR
8	James	37	M	AZ
9	Titus	38	M	WY

statement error
select * from read_csv_auto('{DATA_DIR}/csv/dates.csv', auto_detect=false, delim=',', quote='"', columns={'a': 'VARCHAR'},
   strict_mode=true)
----
Expected Number of Columns: 1 Found: 2

query II
select * from read_csv_auto('{DATA_DIR}/csv/dates.csv')
----
919 304 6161	2008-08-10

query II
select * from read_csv_auto('{DATA_DIR}/csv/from_df.csv', quote='''')
----
'a,b,c'	45
NULL	234
hello	234
bye	2

# CSV file with RFC-conform dialect
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/rfc_conform.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST1	one space
345	TEST1	trailing_space 
567	TEST1	no_space

statement ok
DROP TABLE test;

# CSV file with RFC-conform dialect quote
# read_csv is an alias to read_csv_auto when no extra parameters are supplied
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/rfc_conform_quote.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST2	one space
345	TEST2	trailing_space, 
567	TEST2	no"space

statement ok
DROP TABLE test;

# CSV file with RFC-conform dialect quote/leading space of numerics
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/leading_space_numerics.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST3	text1
345	TEST3	text2
567	TEST3	text3

statement ok
DROP TABLE test;

# CSV file with bar delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/pipe_delim.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST4	text1
345	TEST4	text2
567	TEST4	text3

statement ok
DROP TABLE test;

# CSV file with bar delimiter and double quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/pipe_delim_quote.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST5	text1
345	TEST5	text2|
567	TEST5	text3

statement ok
DROP TABLE test;

# CSV file with bar delimiter and double quotes and double escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/quote_escape.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST6	text1
345	TEST6	text"2"text
567	TEST6	text3

statement ok
DROP TABLE test;

# CSV file with bar delimiter and double quotes and backslash escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/backslash_escape.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST7	text1
345	TEST7	text"2"
567	TEST7	text3

statement ok
DROP TABLE test;

# CSV file with bar delimiter and single quotes and backslash escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/single_quote_backslash.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST8	text1
345	TEST8	text'2'text
567	TEST8	text3

statement ok
DROP TABLE test;

# CSV file with semicolon delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/semicolon_delim.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST9	text1
345	TEST9	text2
567	TEST9	text3

statement ok
DROP TABLE test;

# CSV file with semicolon delimiter and double quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/semicolon_quote.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST10	text1
345	TEST10	text2
567	TEST10	te;xt3

statement ok
DROP TABLE test;

# CSV file with semicolon delimiter, double quotes and RFC escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/semicolon_escape.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST11	text1
345	TEST11	text2
567	TEST11	te"xt3

statement ok
DROP TABLE test;

# CSV file with tab delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/tab.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123	TEST12	text1
345	TEST12	text2
567	TEST12	text3

statement ok
DROP TABLE test;

# CSV file with tab delimiter and single quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/tab_single_quote.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123
TEST13
text1
345
TEST13
te	xt2
567
TEST13
text3

statement ok
DROP TABLE test;

# CSV file with tab delimiter and single quotes without type-hint
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/tab_single_quote_varchar.csv');

query ITT
SELECT * FROM test ORDER BY column0;
----
123
TEST14
text1
345
TEST14
te	xt2
567
TEST14
text3

statement ok
DROP TABLE test;


# CSV file with trailing empty lines 
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('{DATA_DIR}/csv/auto/issue_1254.csv');

query II
SELECT a, b FROM test;
----
1	2
1	2

statement ok
DROP TABLE test;