1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
# name: test/sql/copy/csv/test_comment_option.test
# description: Test that the comment option of csv reading works properly
# group: [csv]
statement ok
PRAGMA enable_verification
# Test comment and skip option
query III
FROM read_csv('{DATA_DIR}/csv/comments/17226.csv', comment='#', all_varchar=True, skip=0, ignore_errors=True)
----
1 2 3
4 5 6
# Comment must be different than quote and delimiter options
statement error
FROM read_csv('{DATA_DIR}/csv/comments/mixed_options.csv', delim = ',', comment = ',', auto_detect = false, columns= {'a':'integer'})
----
COMMENT must not appear in the DELIMITER specification and vice versa
statement error
FROM read_csv('{DATA_DIR}/csv/comments/mixed_options.csv', quote = ',', comment = ',', escape = '', delim = ';', auto_detect = false, columns= {'a':'integer'})
----
COMMENT must not appear in the QUOTE specification and vice versa
query II
FROM '{DATA_DIR}/csv/comments/simple.csv';
----
1 3
6 7
query I
FROM '{DATA_DIR}/csv/comments/simple_comma.csv';
----
, I'm a csv file
a;b
, This is also a baddy
1;3
6;7
, You better skip me
query II
FROM read_csv('{DATA_DIR}/csv/comments/simple_comma.csv', comment = ',');
----
1 3
6 7
# Lets try over a vector size
query II
FROM '{DATA_DIR}/csv/comments/big.csv' limit 5;
----
1 3
6 7
1 3
6 7
1 3
# Check commented data-points are not in the data
query II
FROM '{DATA_DIR}/csv/comments/big.csv' where a = 20
----
query I
SELECT count(*) FROM '{DATA_DIR}/csv/comments/big.csv'
----
1448
# Test empty spaces
query II
FROM '{DATA_DIR}/csv/comments/empty_space.csv';
----
1 3
1 3
1 3
1 3
# Lets try with a buffer limit
loop buffer_size 30 35
query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv', buffer_size = ${buffer_size}) limit 5;
----
1 3
6 7
endloop
# Test that fully commented lines are ignored by the parameter header but not by skiprows.
query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv', skip = 2);
----
6 7
# Test we can detect comments and skip rows at the same time
query II
FROM '{DATA_DIR}/csv/comments/invalid_rows.csv';
----
1 3
6 7
query II
select SkipRows, Comment FROM sniff_csv('{DATA_DIR}/csv/comments/invalid_rows.csv');
----
2 #
# Test ignore errors
statement error
select count(*) FROM '{DATA_DIR}/csv/comments/error.csv';
----
Expected Number of Columns: 2 Found: 1
query I
select count(*) FROM read_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
2726
query I
select count(*) FROM read_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true, comment = '#');
----
2726
query II
select comment, columns from sniff_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
# [{'name': a, 'type': BIGINT}, {'name': b, 'type': BIGINT}]
query II
select comment, columns from sniff_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
# [{'name': a, 'type': BIGINT}, {'name': b, 'type': BIGINT}]
# Test set skip row
loop i 0 2
query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv',skip=${i});
----
1 3
6 7
endloop
|