File: test_window_distinct.test_slow

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (43 lines) | stat: -rw-r--r-- 995 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# name: test/sql/window/test_window_distinct.test_slow
# description: Windowed distinct aggregates at scale
# group: [window]

require tpch

statement ok
CALL dbgen(sf=1);

statement ok
PRAGMA temp_directory='__TEST_DIR__/window_distinct'

# DISTINCT aggregate over a larger data set (e.g. lineitem or so)
query I
SELECT 
	l_orderkey, 
	l_linenumber,
	l_returnflag,
	COUNT(DISTINCT l_returnflag) OVER(PARTITION BY l_orderkey ORDER BY l_linenumber)
FROM lineitem
ORDER BY ALL
----
24004860 values hashing to d39903ddf93011916ff3354d8bded7ff

# Trigger external sorting of a partition
statement ok
PRAGMA threads=4

# The actual memory consumption of this query is 1.5GB
# So this is a serious level of stress.
statement ok
PRAGMA memory_limit='500MB'

query I
WITH t AS (
	SELECT range AS i, CHR((65 + i % 26)::INTEGER) AS c
	FROM range(1e6::BIGINT) 
)
SELECT i, c, COUNT(DISTINCT c) OVER (ORDER BY i DESC) AS n
FROM t
ORDER BY ALL
----
3000000 values hashing to f56f9b4e62101f52951e1f01a9b5c862