File: test_thousands_separator.test

package info (click to toggle)
duckdb 1.5.1-2
links: PTS, VCS
area: main
in suites:
size: 299,196 kB
sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (162 lines) | stat: -rw-r--r-- 4,082 bytes
parent folder | download | duplicates (3)
# name: test/sql/copy/csv/test_thousands_separator.test
# description: Test the thousands separator option
# group: [csv]

statement ok
PRAGMA enable_verification

# Test NULL
statement ok
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = NULL, delim = NULL)

# Test Empty
statement ok
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = '', delim = NULL)

# Test more than one char
statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = 'bla', delim = NULL)
----
Unsupported parameter for THOUSANDS: should be max one character

# Must be different from decimal separator
statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', decimal_separator = ',')
----
THOUSANDS must not appear in the DECIMAL_SEPARATOR specification and vice versa

statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = '.')
----
THOUSANDS must not appear in the DECIMAL_SEPARATOR specification and vice versa

# Test sniffer prompt
query I
SELECT COUNT(*) > 0 AS has_match
FROM sniff_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',')
WHERE prompt LIKE '%thousands='',''%';
----
TRUE

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'double'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'float'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'decimal(32,3)'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

# Now try with auto-detect
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple_quoted.csv', thousands = ',', header = False)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

# Check auto-detect doesn't reject this is foat or decimal
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False, columns = {'a':'float'})
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False, columns = {'a':'decimal(32,3)'})
----
100000.0
300000.2
300000000.4
50.0
9999999999.2

query III
FROM read_csv('{DATA_DIR}/csv/thousands_separator/multi_column.csv', thousands = ',')
----
rick	100000.0	amsterdam
lorenzo	300000.2	amsterdam
lorry	300000000.4	amsterdam
helly	50.0	amsterdam
patrick	9999999999.2	amsterdam


query III
FROM read_csv('{DATA_DIR}/csv/thousands_separator/multi_column_quote.csv', thousands = ',')
----
rick	100000.0	amsterdam
lorenzo	300000.2	amsterdam
lorry	300000000.4	amsterdam
helly	50.0	amsterdam
patrick	9999999999.2	amsterdam

# COPY statement
statement ok
CREATE TABLE T (name varchar, money double, city varchar);

statement ok
COPY T FROM '{DATA_DIR}/csv/thousands_separator/multi_column_quote.csv' (THOUSANDS ',') ;

query III
FROM T;
----
rick	100000.0	amsterdam
lorenzo	300000.2	amsterdam
lorry	300000000.4	amsterdam
helly	50.0	amsterdam
patrick	9999999999.2	amsterdam

# Test with non-sensical thousands separator - Pandas just removes them, so we do the same.
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/thousands_broken.csv', thousands = ',')
----
100000.0
300000.2
300000.4
3000.406

# Test an integer
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/integers.csv', thousands = ',')
----
100000000000
140000000000
900000000000
900

query I
select columns from sniff_csv('{DATA_DIR}/csv/thousands_separator/integers.csv', thousands = ',')
----
[{'name': bigvalues, 'type': BIGINT}]