1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
# name: test/sql/copy/csv/test_thousands_separator.test
# description: Test the thousands separator option
# group: [csv]
statement ok
PRAGMA enable_verification
# Test NULL
statement ok
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = NULL, delim = NULL)
# Test Empty
statement ok
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = '', delim = NULL)
# Test more than one char
statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = 'bla', delim = NULL)
----
Unsupported parameter for THOUSANDS: should be max one character
# Must be different from decimal separator
statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', decimal_separator = ',')
----
THOUSANDS must not appear in the DECIMAL_SEPARATOR specification and vice versa
statement error
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = '.')
----
THOUSANDS must not appear in the DECIMAL_SEPARATOR specification and vice versa
# Test sniffer prompt
query I
SELECT COUNT(*) > 0 AS has_match
FROM sniff_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',')
WHERE prompt LIKE '%thousands='',''%';
----
TRUE
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'double'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'float'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', columns = {'a':'decimal(32,3)'}, header = False, auto_detect = false)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2
# Now try with auto-detect
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple_quoted.csv', thousands = ',', header = False)
----
100000.0
300000.2
300000000.4
50.0
9999999999.2
# Check auto-detect doesn't reject this is foat or decimal
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False, columns = {'a':'float'})
----
100000.0
300000.2
300000000.4
50.0
9999999999.2
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/simple.csv', thousands = ',', delim = ';', header = False, columns = {'a':'decimal(32,3)'})
----
100000.0
300000.2
300000000.4
50.0
9999999999.2
query III
FROM read_csv('{DATA_DIR}/csv/thousands_separator/multi_column.csv', thousands = ',')
----
rick 100000.0 amsterdam
lorenzo 300000.2 amsterdam
lorry 300000000.4 amsterdam
helly 50.0 amsterdam
patrick 9999999999.2 amsterdam
query III
FROM read_csv('{DATA_DIR}/csv/thousands_separator/multi_column_quote.csv', thousands = ',')
----
rick 100000.0 amsterdam
lorenzo 300000.2 amsterdam
lorry 300000000.4 amsterdam
helly 50.0 amsterdam
patrick 9999999999.2 amsterdam
# COPY statement
statement ok
CREATE TABLE T (name varchar, money double, city varchar);
statement ok
COPY T FROM '{DATA_DIR}/csv/thousands_separator/multi_column_quote.csv' (THOUSANDS ',') ;
query III
FROM T;
----
rick 100000.0 amsterdam
lorenzo 300000.2 amsterdam
lorry 300000000.4 amsterdam
helly 50.0 amsterdam
patrick 9999999999.2 amsterdam
# Test with non-sensical thousands separator - Pandas just removes them, so we do the same.
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/thousands_broken.csv', thousands = ',')
----
100000.0
300000.2
300000.4
3000.406
# Test an integer
query I
FROM read_csv('{DATA_DIR}/csv/thousands_separator/integers.csv', thousands = ',')
----
100000000000
140000000000
900000000000
900
query I
select columns from sniff_csv('{DATA_DIR}/csv/thousands_separator/integers.csv', thousands = ',')
----
[{'name': bigvalues, 'type': BIGINT}]
|