File: test_analyze.test

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (100 lines) | stat: -rw-r--r-- 2,107 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# name: test/sql/vacuum/test_analyze.test
# description: Test the ANALYZE statement.
# group: [vacuum]

# The distinct statistics sampling relies on the vector size.
require vector_size 1024

require skip_reload

# Distinct statistics sampling is different for different vector sizes.
require no_vector_verification

statement ok
ANALYZE;

statement ok
VACUUM;

statement error
VACUUM test;
----
Table with name test does not exist

statement error
ANALYZE test;
----
Table with name test does not exist

statement ok
CREATE TABLE test (i INT, j INT);

statement ok
ANALYZE test;

statement ok
CREATE VIEW testview AS SELECT * FROM test;

statement error
ANALYZE testview;
----
Can only vacuum or analyze base tables

statement ok
INSERT INTO test SELECT range % 5000, range % 5000 FROM range(10000);

# The approximate unique count is inaccurate due to sampling.
query T
SELECT stats(i) FROM test LIMIT 1;
----
[Min: 0, Max: 4999][Has Null: false, Has No Null: true][Approx Unique: 10000]

query T
SELECT stats(j) FROM test LIMIT 1;
----
[Min: 0, Max: 4999][Has Null: false, Has No Null: true][Approx Unique: 10000]

statement ok
PRAGMA verify_parallelism;

statement ok
ANALYZE test(i);

statement ok
VACUUM test(i);

statement ok
PRAGMA disable_verify_parallelism;

# The approximate unique count for i is more accurate now.
query T
SELECT stats(i) FROM test LIMIT 1;
----
[Min: 0, Max: 4999][Has Null: false, Has No Null: true][Approx Unique: 5661]

# The approximate unique count for j is not yet accurate.
query T
SELECT stats(j) FROM test LIMIT 1;
----
[Min: 0, Max: 4999][Has Null: false, Has No Null: true][Approx Unique: 10000]

# Now we analyze the entire table.
statement ok
PRAGMA verify_parallelism;

statement ok
ANALYZE test;

statement ok
PRAGMA disable_verify_parallelism;

# The approximate unique count for i and j is more accurate now.
query T
SELECT stats(i) FROM test LIMIT 1;
----
[Min: 0, Max: 4999][Has Null: false, Has No Null: true][Approx Unique: 5661]

query T
SELECT stats(j) FROM test LIMIT 1;
----
[Min: 0, Max: 4999][Has Null: false, Has No Null: true][Approx Unique: 5661]