1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
# name: test/sql/sample/large_sample.test_slow
# description: Test sampling of larger relations
# group: [sample]
statement ok
PRAGMA enable_verification;
# sample on a larger data set
query I
SELECT COUNT(*) FROM range(10000) USING SAMPLE 5
----
5
# test sample with multiple columns
# we insert the same data in the entire column
statement ok
CREATE TABLE test2 AS SELECT i a, i::VARCHAR b, CONCAT(i, ' - ', i) c FROM repeat(1, 1000) tbl(i)
query III
SELECT a, b, c FROM test2 USING SAMPLE 3;
----
1 1 1 - 1
1 1 1 - 1
1 1 1 - 1
# reservoir sample from a larger dataset
query I
select count(*) from range(200000) tablesample reservoir(90%);
----
180000
loop i 0 3
# sample_size sampling with a large reservoir
query I nosort reservoirlarge
select count(*) from (select * from range(200000) tbl(i) where i % 997 != 0) tbl(i) using sample 80% (reservoir);
----
endloop
|