File: test_thousands_separator.test

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (162 lines) | stat: -rw-r--r-- 4,082 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# name: test/sql/copy/csv/test_thousands_separator.test
# description: Test the thousands separator option
# group: [csv]

statement ok
PRAGMA enable_verification

# Test NULL
statement ok
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = NULL, delim = NULL)

# Test Empty
statement ok
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = '', delim = NULL)

# Test more than one char
statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = 'bla', delim = NULL)
----
Unsupported parameter for THOUSANDS: should be max one character

# Must be different from decimal separator
statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', decimal_separator = ',')
----
THOUSANDS must not appear in the DECIMAL_SEPARATOR specification and vice versa

statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = '.')
----
THOUSANDS must not appear in the DECIMAL_SEPARATOR specification and vice versa

# Test sniffer prompt
query I
SELECT COUNT(*) > 0 AS has_match
FROM sniff_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',')
WHERE prompt LIKE '%thousands='',''%';
----
TRUE

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'double'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'float'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'decimal(32,3)'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

# Now try with auto-detect
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple_quoted.csv', thousands = ',', header = False)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

# Check auto-detect doesn't reject this is foat or decimal
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False, columns = {'a':'float'})
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False, columns = {'a':'decimal(32,3)'})
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query III
FROM read_csv('{DATA_DIR}/csv/thousands_separator/multi_column.csv', thousands = ',')
----
rick	100000.0	amsterdam
lorenzo	300000.2	amsterdam
lorry	300000000.4	amsterdam
helly	50.0	amsterdam
patrick	9999999999.2	amsterdam


query III
FROM read_csv('{DATA_DIR}/csv/thousands_separator/multi_column_quote.csv', thousands = ',')
----
rick	100000.0	amsterdam
lorenzo	300000.2	amsterdam
lorry	300000000.4	amsterdam
helly	50.0	amsterdam
patrick	9999999999.2	amsterdam

# COPY statement
statement ok
CREATE TABLE T (name varchar, money double, city varchar);

statement ok
COPY T FROM '{DATA_DIR}/csv/thousands_separator/multi_column_quote.csv' (THOUSANDS ',') ;

query III
FROM T;
----
rick	100000.0	amsterdam
lorenzo	300000.2	amsterdam
lorry	300000000.4	amsterdam
helly	50.0	amsterdam
patrick	9999999999.2	amsterdam

# Test with non-sensical thousands separator - Pandas just removes them, so we do the same.
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/thousands_broken.csv', thousands = ',')
----
100000.0
300000.2
300000.4
3000.406

# Test an integer
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/integers.csv', thousands = ',')
----
100000000000
140000000000
900000000000
900

query I
select columns from sniff_csv('{DATA_DIR}/csv/thousands_separator/integers.csv', thousands = ',')
----
[{'name': bigvalues, 'type': BIGINT}]