File: test_comment_option.test

package info (click to toggle)

duckdb 1.5.1-2

links: PTS, VCS
area: main
in suites:
size: 299,196 kB
sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558

file content (144 lines) | stat: -rw-r--r-- 3,031 bytes

parent folder | download | duplicates (3)

# name: test/sql/copy/csv/test_comment_option.test
# description: Test that the comment option of csv reading works properly
# group: [csv]

statement ok
PRAGMA enable_verification

# Test comment and skip option
query III
FROM read_csv('{DATA_DIR}/csv/comments/17226.csv', comment='#', all_varchar=True, skip=0, ignore_errors=True)
----
1	2	3
4	5	6

# Comment must be different than quote and delimiter options
statement error
FROM  read_csv('{DATA_DIR}/csv/comments/mixed_options.csv', delim = ',', comment = ',', auto_detect = false, columns= {'a':'integer'})
----
COMMENT must not appear in the DELIMITER specification and vice versa

statement error
FROM  read_csv('{DATA_DIR}/csv/comments/mixed_options.csv', quote = ',', comment = ',', escape = '', delim = ';', auto_detect = false, columns= {'a':'integer'})
----
COMMENT must not appear in the QUOTE specification and vice versa

query II
FROM '{DATA_DIR}/csv/comments/simple.csv';
----
1	3
6	7

query I
FROM '{DATA_DIR}/csv/comments/simple_comma.csv';
----
, I'm a csv file
a;b
, This is also a baddy
1;3
6;7
, You better skip me

query II
FROM read_csv('{DATA_DIR}/csv/comments/simple_comma.csv', comment = ',');
----
1	3
6	7

# Lets try over a vector size
query II
FROM '{DATA_DIR}/csv/comments/big.csv' limit 5;
----
1	3
6	7
1	3
6	7
1	3

# Check commented data-points are not in the data
query II
FROM '{DATA_DIR}/csv/comments/big.csv' where  a = 20
----

query I
SELECT count(*) FROM '{DATA_DIR}/csv/comments/big.csv'
----
1448

# Test empty spaces
query II
FROM '{DATA_DIR}/csv/comments/empty_space.csv';
----
1	3
1	3
1	3
1	3


# Lets try with a buffer limit

loop buffer_size 30 35

query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv', buffer_size = ${buffer_size}) limit 5;
----
1	3
6	7

endloop

# Test that fully commented lines are ignored by the parameter header but not by skiprows.
query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv', skip = 2);
----
6	7

# Test we can detect comments and skip rows at the same time
query II
FROM '{DATA_DIR}/csv/comments/invalid_rows.csv';
----
1	3
6	7

query II
select SkipRows, Comment FROM sniff_csv('{DATA_DIR}/csv/comments/invalid_rows.csv');
----
2	#

# Test ignore errors
statement error
select count(*) FROM '{DATA_DIR}/csv/comments/error.csv';
----
Expected Number of Columns: 2 Found: 1

query I
select count(*) FROM read_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
2726

query I
select count(*) FROM read_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true, comment = '#');
----
2726

query II
select comment, columns from sniff_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
#	[{'name': a, 'type': BIGINT}, {'name': b, 'type': BIGINT}]

query II
select comment, columns from sniff_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
#	[{'name': a, 'type': BIGINT}, {'name': b, 'type': BIGINT}]

# Test set skip row
loop i 0 2

query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv',skip=${i});
----
1	3
6	7

endloop