1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
# name: test/sql/function/list/aggregates/approx_count_distinct.test
# description: Test the list_approx_count_distinct aggregate function
# group: [aggregates]
statement ok
CREATE TABLE list_ints (l INTEGER[]);
statement ok
INSERT INTO list_ints SELECT LIST(i) FROM range(100) tbl(i);
query II
select list_approx_count_distinct([10]), list_approx_count_distinct(['hello']) from list_ints;
----
1 1
query II
select list_approx_count_distinct(l), list_approx_count_distinct(['hello']) from list_ints;
----
98 1
query I
select list_approx_count_distinct([]) from list_ints;
----
0
statement ok
INSERT INTO list_ints VALUES ([]), (NULL), ([NULL]);
query I
select list_approx_count_distinct(l) from list_ints;
----
98
0
NULL
0
# dates
statement ok
CREATE TABLE IF NOT EXISTS dates (t date[]);
statement ok
INSERT INTO dates VALUES (['2008-01-01', NULL, '2007-01-01', '2008-02-01', '2008-01-02', '2008-01-01', '2008-01-01', '2008-01-01'])
query II
SELECT list_count(t), list_approx_count_distinct(t) from dates
----
7 4
# timestamps
statement ok
CREATE TABLE IF NOT EXISTS timestamp (t TIMESTAMP[]);
statement ok
INSERT INTO timestamp VALUES (['2008-01-01 00:00:01', NULL, '2007-01-01 00:00:01', '2008-02-01 00:00:01', '2008-01-02 00:00:01', '2008-01-01 10:00:00', '2008-01-01 00:10:00', '2008-01-01 00:00:10'])
query II
SELECT list_count(t), list_approx_count_distinct(t) from timestamp
----
7 6
# strings
statement ok
CREATE TABLE IF NOT EXISTS names (t string[]);
statement ok
INSERT INTO names VALUES (['Pedro', NULL, 'Pedro', 'Pedro', 'Mark', 'Mark', 'Mark', 'Hannes-Muehleisen', 'Hannes-Muehleisen'])
query II
SELECT list_count(t), list_approx_count_distinct(t) from names
----
8 3
# test approximate count on different (long) lists
statement ok
CREATE TABLE list_ints_2 (a INTEGER[], b INTEGER[]);
statement ok
INSERT INTO list_ints_2 SELECT LIST(a), LIST(mod(a, 10)) FROM range(2000) tbl(a);
query III
SELECT list_count(a), list_approx_count_distinct(a), list_approx_count_distinct(b) from list_ints_2
----
2000 2322 11
statement ok
DELETE FROM list_ints_2
statement ok
INSERT INTO list_ints_2 SELECT LIST(a), NULL FROM range(2000) tbl(a, b) WHERE a % 2 = 0;
statement ok
INSERT INTO list_ints_2 SELECT LIST(a), NULL FROM range(2000) tbl(a, b) WHERE a % 2 = 1;
query I
SELECT list_approx_count_distinct(a) from list_ints_2;
----
1006
1230
|