File: large_sample.test_slow

package info (click to toggle)
duckdb 1.5.1-3
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 564
file content (40 lines) | stat: -rw-r--r-- 877 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# name: test/sql/sample/large_sample.test_slow
# description: Test sampling of larger relations
# group: [sample]

statement ok
PRAGMA enable_verification;

# sample on a larger data set
query I
SELECT COUNT(*) FROM range(10000) USING SAMPLE 5
----
5

# test sample with multiple columns
# we insert the same data in the entire column
statement ok
CREATE TABLE test2 AS SELECT i a, i::VARCHAR b, CONCAT(i, ' - ', i) c FROM repeat(1, 1000) tbl(i)

query III
SELECT a, b, c FROM test2 USING SAMPLE 3;
----
1	1	1 - 1
1	1	1 - 1
1	1	1 - 1

# reservoir sample from a larger dataset
query I
select count(*) from range(200000) tablesample reservoir(90%);
----
180000

loop i 0 3

# sample_size sampling with a large reservoir
query I nosort reservoirlarge
select count(*) from (select * from range(200000) tbl(i) where i % 997 != 0) tbl(i) using sample 80% (reservoir);
----

endloop