File: return_stats_truncate.test

package info (click to toggle)
duckdb 1.5.1-3
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 564
file content (49 lines) | stat: -rw-r--r-- 2,786 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# name: test/sql/copy/return_stats_truncate.test
# description: test truncation of large strings/blobs in parquet stats
# group: [copy]

require parquet

# we cannot truncate blobs that are all 0xFF - so stats are omitted
query IIIIII
COPY (SELECT repeat('\xFF', 300)::BLOB as blob) TO '__TEST_DIR__/test_truncate_blob.parquet' (RETURN_STATS);
----
<REGEX>:.*test_truncate_blob.parquet	1	<REGEX>:\d+	<REGEX>:\d+	<REGEX>:{'"blob"'={column_size_bytes=\d+, null_count=0, num_values=1}}	NULL

query II
SELECT min_is_exact, max_is_exact FROM parquet_metadata('__TEST_DIR__/test_truncate_blob.parquet');
----
NULL	NULL

# if there is any byte that is less than 0xFF we can truncate
query IIIIII
COPY (SELECT ('\xFE' || repeat('\xFF', 300))::BLOB as blob) TO '__TEST_DIR__/test_truncate_blob.parquet' (RETURN_STATS);
----
<REGEX>:.*test_truncate_blob.parquet	1	<REGEX>:\d+	<REGEX>:\d+	<REGEX>:{'"blob"'={column_size_bytes=\d+, max=FF, min=FEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, null_count=0, num_values=1}}	NULL

query II
SELECT min_is_exact, max_is_exact FROM parquet_metadata('__TEST_DIR__/test_truncate_blob.parquet');
----
false	false

# for strings we don't truncate unicode characters
query IIIIII
COPY (SELECT repeat('🦆', 300) as str) TO '__TEST_DIR__/test_truncate_string.parquet' (RETURN_STATS);
----
<REGEX>:.*test_truncate_string.parquet	1	<REGEX>:\d+	<REGEX>:\d+	<REGEX>:{'"str"'={column_size_bytes=\d+, null_count=0, num_values=1}}	NULL

query II
SELECT min_is_exact, max_is_exact FROM parquet_metadata('__TEST_DIR__/test_truncate_string.parquet');
----
NULL	NULL

# but we can truncate if there is a single ascii character in the string
query IIIIII
COPY (SELECT 'B' || repeat('🦆', 300) as str) TO '__TEST_DIR__/test_truncate_string.parquet' (RETURN_STATS);
----
<REGEX>:.*test_truncate_string.parquet	1	<REGEX>:\d+	<REGEX>:\d+	<REGEX>:{'"str"'={column_size_bytes=\d+, max=C, min=B🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆🦆, null_count=0, num_values=1}}	NULL

query II
SELECT min_is_exact, max_is_exact FROM parquet_metadata('__TEST_DIR__/test_truncate_string.parquet');
----
false	false