File: test_comment_option.test

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (144 lines) | stat: -rw-r--r-- 3,031 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# name: test/sql/copy/csv/test_comment_option.test
# description: Test that the comment option of csv reading works properly
# group: [csv]

statement ok
PRAGMA enable_verification

# Test comment and skip option
query III
FROM read_csv('{DATA_DIR}/csv/comments/17226.csv', comment='#', all_varchar=True, skip=0, ignore_errors=True)
----
1	2	3
4	5	6

# Comment must be different than quote and delimiter options
statement error
FROM  read_csv('{DATA_DIR}/csv/comments/mixed_options.csv', delim = ',', comment = ',', auto_detect = false, columns= {'a':'integer'})
----
COMMENT must not appear in the DELIMITER specification and vice versa

statement error
FROM  read_csv('{DATA_DIR}/csv/comments/mixed_options.csv', quote = ',', comment = ',', escape = '', delim = ';', auto_detect = false, columns= {'a':'integer'})
----
COMMENT must not appear in the QUOTE specification and vice versa

query II
FROM '{DATA_DIR}/csv/comments/simple.csv';
----
1	3
6	7

query I
FROM '{DATA_DIR}/csv/comments/simple_comma.csv';
----
, I'm a csv file
a;b
, This is also a baddy
1;3
6;7
, You better skip me

query II
FROM read_csv('{DATA_DIR}/csv/comments/simple_comma.csv', comment = ',');
----
1	3
6	7

# Lets try over a vector size
query II
FROM '{DATA_DIR}/csv/comments/big.csv' limit 5;
----
1	3
6	7
1	3
6	7
1	3

# Check commented data-points are not in the data
query II
FROM '{DATA_DIR}/csv/comments/big.csv' where  a = 20
----

query I
SELECT count(*) FROM '{DATA_DIR}/csv/comments/big.csv'
----
1448

# Test empty spaces
query II
FROM '{DATA_DIR}/csv/comments/empty_space.csv';
----
1	3
1	3
1	3
1	3


# Lets try with a buffer limit

loop buffer_size 30 35

query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv', buffer_size = ${buffer_size}) limit 5;
----
1	3
6	7

endloop

# Test that fully commented lines are ignored by the parameter header but not by skiprows.
query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv', skip = 2);
----
6	7

# Test we can detect comments and skip rows at the same time
query II
FROM '{DATA_DIR}/csv/comments/invalid_rows.csv';
----
1	3
6	7

query II
select SkipRows, Comment FROM sniff_csv('{DATA_DIR}/csv/comments/invalid_rows.csv');
----
2	#

# Test ignore errors
statement error
select count(*) FROM '{DATA_DIR}/csv/comments/error.csv';
----
Expected Number of Columns: 2 Found: 1

query I
select count(*) FROM read_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
2726

query I
select count(*) FROM read_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true, comment = '#');
----
2726

query II
select comment, columns from sniff_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
#	[{'name': a, 'type': BIGINT}, {'name': b, 'type': BIGINT}]

query II
select comment, columns from sniff_csv('{DATA_DIR}/csv/comments/error.csv', ignore_errors = true);
----
#	[{'name': a, 'type': BIGINT}, {'name': b, 'type': BIGINT}]

# Test set skip row
loop i 0 2

query II
FROM read_csv('{DATA_DIR}/csv/comments/simple.csv',skip=${i});
----
1	3
6	7

endloop