1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
# name: test/sql/copy/csv/test_sniff_csv_options.test
# description: Test sniff_csv function with user set options
# group: [csv]
statement ok
PRAGMA enable_verification
# requires notwindows because tests will return \r\n to be used in the parameters
require notwindows
# Test user giving wrong option
statement error
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', delim = ',');
----
It was not possible to automatically detect the CSV parsing dialect
# Test user giving options that are also sniffed
# delimiter
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', delim='|');
----
| (empty) (empty) \n (empty) 0 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL delim='|' FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d', delim='|');
# quote
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', quote='"');
----
| " (empty) \n (empty) 0 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL quote='"' FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d', quote='"');
# escape
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', escape='"');
----
| " " \n (empty) 0 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL escape='"' FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='"', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d', escape='"');
# column names
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', escape='"');
----
| " " \n (empty) 0 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL escape='"' FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='"', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d', escape='"');
# column names and types
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', names=['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', 'c16']);
----
| (empty) (empty) \n (empty) 0 0 [{'name': c1, 'type': BIGINT}, {'name': c2, 'type': BIGINT}, {'name': c3, 'type': BIGINT}, {'name': c4, 'type': BIGINT}, {'name': c5, 'type': BIGINT}, {'name': c6, 'type': DOUBLE}, {'name': c7, 'type': DOUBLE}, {'name': c8, 'type': DOUBLE}, {'name': c9, 'type': VARCHAR}, {'name': c10, 'type': VARCHAR}, {'name': c11, 'type': DATE}, {'name': c12, 'type': DATE}, {'name': c13, 'type': DATE}, {'name': c14, 'type': VARCHAR}, {'name': c15, 'type': VARCHAR}, {'name': c16, 'type': VARCHAR}] %Y-%m-%d NULL NULL FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'c1': 'BIGINT', 'c2': 'BIGINT', 'c3': 'BIGINT', 'c4': 'BIGINT', 'c5': 'BIGINT', 'c6': 'DOUBLE', 'c7': 'DOUBLE', 'c8': 'DOUBLE', 'c9': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'}, dateformat='%Y-%m-%d');
query IIIIIIIIIIIIIIII
FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='"', escape='"', new_line='\n', skip=0, header=false, columns={'c1': 'BIGINT', 'c2': 'BIGINT', 'c3': 'BIGINT', 'c4': 'BIGINT', 'c5': 'BIGINT', 'c6': 'DOUBLE', 'c7': 'DOUBLE', 'c8': 'DOUBLE', 'c9': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'}, dateformat='%Y-%m-%d') limit 1;
----
1 15519 785 1 17 24386.67 0.04 0.02 N O 1996-03-13 1996-02-12 1996-03-22 DELIVER IN PERSON TRUCK egular courts above the
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', columns={'c1': 'BIGINT', 'c2': 'BIGINT', 'c3': 'BIGINT', 'c4': 'BIGINT', 'c5': 'BIGINT', 'c6': 'DOUBLE', 'c7': 'DOUBLE', 'c8': 'DOUBLE', 'c9': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'});
----
| (empty) (empty) \n (empty) 0 0 [{'name': c1, 'type': BIGINT}, {'name': c2, 'type': BIGINT}, {'name': c3, 'type': BIGINT}, {'name': c4, 'type': BIGINT}, {'name': c5, 'type': BIGINT}, {'name': c6, 'type': DOUBLE}, {'name': c7, 'type': DOUBLE}, {'name': c8, 'type': DOUBLE}, {'name': c9, 'type': VARCHAR}, {'name': c10, 'type': VARCHAR}, {'name': c11, 'type': DATE}, {'name': c12, 'type': DATE}, {'name': c13, 'type': DATE}, {'name': c14, 'type': VARCHAR}, {'name': c15, 'type': VARCHAR}, {'name': c16, 'type': VARCHAR}] %Y-%m-%d NULL NULL FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'c1': 'BIGINT', 'c2': 'BIGINT', 'c3': 'BIGINT', 'c4': 'BIGINT', 'c5': 'BIGINT', 'c6': 'DOUBLE', 'c7': 'DOUBLE', 'c8': 'DOUBLE', 'c9': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'}, dateformat='%Y-%m-%d');
# skip rows
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', skip=1);
----
| (empty) (empty) \n (empty) 1 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL skip=1 FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d', skip=1);
# header exists
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', header=true);
----
| (empty) (empty) \n (empty) 0 1 [{'name': 1, 'type': BIGINT}, {'name': 15519, 'type': BIGINT}, {'name': 785, 'type': BIGINT}, {'name': 1_1, 'type': BIGINT}, {'name': 17, 'type': BIGINT}, {'name': 24386.670000, 'type': DOUBLE}, {'name': 0.040000, 'type': DOUBLE}, {'name': 0.020000, 'type': DOUBLE}, {'name': N, 'type': VARCHAR}, {'name': O, 'type': VARCHAR}, {'name': 1996-03-13, 'type': DATE}, {'name': 1996-02-12, 'type': DATE}, {'name': 1996-03-22, 'type': DATE}, {'name': DELIVER IN PERSON, 'type': VARCHAR}, {'name': TRUCK, 'type': VARCHAR}, {'name': egular courts above the, 'type': VARCHAR}] %Y-%m-%d NULL header=true FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', columns={'1': 'BIGINT', '15519': 'BIGINT', '785': 'BIGINT', '1_1': 'BIGINT', '17': 'BIGINT', '24386.670000': 'DOUBLE', '0.040000': 'DOUBLE', '0.020000': 'DOUBLE', 'N': 'VARCHAR', 'O': 'VARCHAR', '1996-03-13': 'DATE', '1996-02-12': 'DATE', '1996-03-22': 'DATE', 'DELIVER IN PERSON': 'VARCHAR', 'TRUCK': 'VARCHAR', 'egular courts above the': 'VARCHAR'}, dateformat='%Y-%m-%d', header=true);
# timestampformat
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/auto/time_date_timestamp_yyyy.mm.dd.csv', dateformat='%Y.%m.%d')
----
, (empty) (empty) \n (empty) 0 1 [{'name': a, 'type': BIGINT}, {'name': b, 'type': VARCHAR}, {'name': t, 'type': TIME}, {'name': d, 'type': DATE}, {'name': ts, 'type': TIMESTAMP}] %Y.%m.%d %Y.%m.%d %H:%M:%S dateformat='%Y.%m.%d' FROM read_csv('{DATA_DIR}/csv/auto/time_date_timestamp_yyyy.mm.dd.csv', auto_detect=false, delim=',', quote='', escape='', new_line='\n', skip=0, comment='', header=true, columns={'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}, timestampformat='%Y.%m.%d %H:%M:%S', dateformat='%Y.%m.%d');
# dateformat
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/auto/time_date_timestamp_yyyy.mm.dd.csv', timestampformat='%Y.%m.%d %H:%M:%S')
----
, (empty) (empty) \n (empty) 0 1 [{'name': a, 'type': BIGINT}, {'name': b, 'type': VARCHAR}, {'name': t, 'type': TIME}, {'name': d, 'type': DATE}, {'name': ts, 'type': TIMESTAMP}] %Y.%m.%d %Y.%m.%d %H:%M:%S timestampformat='%Y.%m.%d %H:%M:%S' FROM read_csv('{DATA_DIR}/csv/auto/time_date_timestamp_yyyy.mm.dd.csv', auto_detect=false, delim=',', quote='', escape='', new_line='\n', skip=0, comment='', header=true, columns={'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}, dateformat='%Y.%m.%d', timestampformat='%Y.%m.%d %H:%M:%S');
# Test a combination
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/auto/time_date_timestamp_yyyy.mm.dd.csv', dateformat='%Y.%m.%d', timestampformat='%Y.%m.%d %H:%M:%S')
----
, (empty) (empty) \n (empty) 0 1 [{'name': a, 'type': BIGINT}, {'name': b, 'type': VARCHAR}, {'name': t, 'type': TIME}, {'name': d, 'type': DATE}, {'name': ts, 'type': TIMESTAMP}] %Y.%m.%d %Y.%m.%d %H:%M:%S dateformat='%Y.%m.%d', timestampformat='%Y.%m.%d %H:%M:%S' FROM read_csv('{DATA_DIR}/csv/auto/time_date_timestamp_yyyy.mm.dd.csv', auto_detect=false, delim=',', quote='', escape='', new_line='\n', skip=0, comment='', header=true, columns={'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}, dateformat='%Y.%m.%d', timestampformat='%Y.%m.%d %H:%M:%S');
query IIIII
FROM read_csv('{DATA_DIR}/csv/auto/time_date_timestamp_yyyy.mm.dd.csv', auto_detect=false, delim=',', quote='"', escape='"', new_line='\n', skip=0, header=true, columns={'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}, timestampformat='%Y.%m.%d %H:%M:%S', dateformat='%Y.%m.%d') order by all limit 1;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 12:12:00
# test auto_detect=false (that's illegal sir)
statement error
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', columns={'c1': 'BIGINT', 'c2': 'BIGINT', 'c3': 'BIGINT', 'c4': 'BIGINT', 'c5': 'BIGINT', 'c6': 'DOUBLE', 'c7': 'DOUBLE', 'c8': 'DOUBLE', 'c9': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'}, auto_detect = false);
----
sniff_csv function does not accept auto_detect variable set to false
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', columns={'c1': 'BIGINT', 'c2': 'BIGINT', 'c3': 'BIGINT', 'c4': 'BIGINT', 'c5': 'BIGINT', 'c6': 'DOUBLE', 'c7': 'DOUBLE', 'c8': 'DOUBLE', 'c9': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'}, auto_detect = true);
----
| (empty) (empty) \n (empty) 0 0 [{'name': c1, 'type': BIGINT}, {'name': c2, 'type': BIGINT}, {'name': c3, 'type': BIGINT}, {'name': c4, 'type': BIGINT}, {'name': c5, 'type': BIGINT}, {'name': c6, 'type': DOUBLE}, {'name': c7, 'type': DOUBLE}, {'name': c8, 'type': DOUBLE}, {'name': c9, 'type': VARCHAR}, {'name': c10, 'type': VARCHAR}, {'name': c11, 'type': DATE}, {'name': c12, 'type': DATE}, {'name': c13, 'type': DATE}, {'name': c14, 'type': VARCHAR}, {'name': c15, 'type': VARCHAR}, {'name': c16, 'type': VARCHAR}] %Y-%m-%d NULL NULL FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'c1': 'BIGINT', 'c2': 'BIGINT', 'c3': 'BIGINT', 'c4': 'BIGINT', 'c5': 'BIGINT', 'c6': 'DOUBLE', 'c7': 'DOUBLE', 'c8': 'DOUBLE', 'c9': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'}, dateformat='%Y-%m-%d');
# auto_type_candidates
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/autotypecandidates.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
| (empty) (empty) \n (empty) 0 0 [{'name': column0, 'type': SMALLINT}, {'name': column1, 'type': FLOAT}, {'name': column2, 'type': VARCHAR}] NULL NULL NULL FROM read_csv('{DATA_DIR}/csv/autotypecandidates.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column0': 'SMALLINT', 'column1': 'FLOAT', 'column2': 'VARCHAR'});
query III
FROM read_csv('{DATA_DIR}/csv/autotypecandidates.csv', auto_detect=false, delim='|', quote='"', escape='"', new_line='\n', skip=0, header=false, columns={'column0': 'SMALLINT', 'column1': 'FLOAT', 'column2': 'VARCHAR'});
----
1 1.1 bla
# don't accept globs
statement error
FROM sniff_csv('{DATA_DIR}/csv/hive-partitioning/simple/*/*/test.csv');
----
Not implemented Error: sniff_csv does not operate on more than one file yet
# don't accept madeup options
statement error
FROM sniff_csv('{DATA_DIR}/csv/autotypecandidates.csv', oop = True);
----
Invalid named parameter "oop" for function sniff_csv
# Ignore multi-partitioning options (maybe even error on them?)
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/autotypecandidates.csv', HIVE_PARTITIONING=1);
----
| (empty) (empty) \n (empty) 0 0 [{'name': column0, 'type': BIGINT}, {'name': column1, 'type': DOUBLE}, {'name': column2, 'type': VARCHAR}] NULL NULL NULL FROM read_csv('{DATA_DIR}/csv/autotypecandidates.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column0': 'BIGINT', 'column1': 'DOUBLE', 'column2': 'VARCHAR'});
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', types=['INTEGER','BIGINT','BIGINT','BIGINT','BIGINT', 'DOUBLE','DOUBLE','DOUBLE','VARCHAR', 'VARCHAR','DATE', 'DATE', 'DATE', 'VARCHAR', 'VARCHAR', 'VARCHAR']);
----
| (empty) (empty) \n (empty) 0 0 [{'name': column00, 'type': INTEGER}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL NULL FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'INTEGER', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d');
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', dtypes=['INTEGER','BIGINT','BIGINT','BIGINT','BIGINT', 'DOUBLE','DOUBLE','DOUBLE','VARCHAR', 'VARCHAR','DATE', 'DATE', 'DATE', 'VARCHAR', 'VARCHAR', 'VARCHAR']);
----
| (empty) (empty) \n (empty) 0 0 [{'name': column00, 'type': INTEGER}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL NULL FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'INTEGER', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d');
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', column_types=['INTEGER','BIGINT','BIGINT','BIGINT','BIGINT', 'DOUBLE','DOUBLE','DOUBLE','VARCHAR', 'VARCHAR','DATE', 'DATE', 'DATE', 'VARCHAR', 'VARCHAR', 'VARCHAR']);
----
| (empty) (empty) \n (empty) 0 0 [{'name': column00, 'type': INTEGER}, {'name': column01, 'type': BIGINT}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': DOUBLE}, {'name': column06, 'type': DOUBLE}, {'name': column07, 'type': DOUBLE}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': DATE}, {'name': column11, 'type': DATE}, {'name': column12, 'type': DATE}, {'name': column13, 'type': VARCHAR}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}] %Y-%m-%d NULL NULL FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'INTEGER', 'column01': 'BIGINT', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'DOUBLE', 'column06': 'DOUBLE', 'column07': 'DOUBLE', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'DATE', 'column11': 'DATE', 'column12': 'DATE', 'column13': 'VARCHAR', 'column14': 'VARCHAR', 'column15': 'VARCHAR'}, dateformat='%Y-%m-%d');
query IIIIIIIIIIII
FROM sniff_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', names=['c01','c02','c03','c04','c5', 'c06','c07','c08','c09', 'c10','c11', 'c12', 'c13', 'c14', 'c15', 'c16']);
----
| (empty) (empty) \n (empty) 0 0 [{'name': c01, 'type': BIGINT}, {'name': c02, 'type': BIGINT}, {'name': c03, 'type': BIGINT}, {'name': c04, 'type': BIGINT}, {'name': c5, 'type': BIGINT}, {'name': c06, 'type': DOUBLE}, {'name': c07, 'type': DOUBLE}, {'name': c08, 'type': DOUBLE}, {'name': c09, 'type': VARCHAR}, {'name': c10, 'type': VARCHAR}, {'name': c11, 'type': DATE}, {'name': c12, 'type': DATE}, {'name': c13, 'type': DATE}, {'name': c14, 'type': VARCHAR}, {'name': c15, 'type': VARCHAR}, {'name': c16, 'type': VARCHAR}] %Y-%m-%d NULL NULL FROM read_csv('{DATA_DIR}/csv/real/lineitem_sample.csv', auto_detect=false, delim='|', quote='', escape='', new_line='\n', skip=0, comment='', header=false, columns={'c01': 'BIGINT', 'c02': 'BIGINT', 'c03': 'BIGINT', 'c04': 'BIGINT', 'c5': 'BIGINT', 'c06': 'DOUBLE', 'c07': 'DOUBLE', 'c08': 'DOUBLE', 'c09': 'VARCHAR', 'c10': 'VARCHAR', 'c11': 'DATE', 'c12': 'DATE', 'c13': 'DATE', 'c14': 'VARCHAR', 'c15': 'VARCHAR', 'c16': 'VARCHAR'}, dateformat='%Y-%m-%d');
# Test that rejects returs correct values
query I
SELECT delimiter FROM sniff_csv('{DATA_DIR}/csv/ignore_errors.csv');
----
;
query I
SELECT delimiter FROM sniff_csv('{DATA_DIR}/csv/ignore_errors.csv', ignore_errors=true);
----
;
query I
SELECT delimiter FROM sniff_csv('{DATA_DIR}/csv/ignore_errors.csv', store_rejects=true);
----
;
query I
SELECT delimiter FROM sniff_csv('{DATA_DIR}/csv/ignore_errors.csv', rejects_scan='a_1');
----
;
query I
SELECT delimiter FROM sniff_csv('{DATA_DIR}/csv/ignore_errors.csv', rejects_table='a_2');
----
;
|