File: group_by_hypergraph.test

package info (click to toggle)
mysql-8.0 8.0.43-3
links: PTS, VCS
area: main
in suites: sid
size: 1,273,924 kB
sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (318 lines) | stat: -rw-r--r-- 9,672 bytes
--source include/have_hypergraph.inc
--source include/elide_costs.inc

--echo #
--echo # Bug#32980875: WL#14422: ASSERTION `FALSE' FAILED|SQL/BASIC_ROW_ITERATORS.H
--echo #

# This should have been a unit test in hypergraph_optimizer-t.cc, but
# the unit test framework does not currently allow creating temporary
# tables.
#
# We want to test that the hypergraph optimizer doesn't try to do
# sort-ahead on an aggregate.
#
# We expect the hypergraph optimizer to set up a plan like this:
#
# -> Remove duplicates from input grouped on t.x, `MIN(t2.x)`
#   -> Stream results
#       -> Group aggregate: min(t2.x)
#           -> Sort: t2.x
#               -> Inner hash join (t1.x = t2.x)
#                   -> Table scan on t1
#                   -> Hash
#                       -> Table scan on t2
#
# Before, it would set up this plan, which tried to sort on MIN(t2.x)
# before the aggregation had happened:
#
# -> Remove duplicates from input grouped on t1.x, min(t2.x)
#   -> Stream results
#       -> Group aggregate: min(t2.x)
#           -> Sort: t1.x, min(t2.x)
#               -> Inner hash join (t1.x = t2.x)
#                   -> Table scan on t1
#                   -> Hash
#                       -> Table scan on t2
#
# Note that because MIN(t2.x) is functionally dependent on the GROUP BY
# expression (which happens to be t2.x as well here), we avoid a sort
# in the final DISTINCT pass. We further shuffle the SELECT expressions
# around a bit (putting the MIN() first) to demonstrate that our sorting of
# expressions in an interesting grouping is robust.
#
# Of course, we should have been able to remove the entire DISTINCT operation,
# and if we wrote t2.x instead of t1.x, we would be able to do that. However,
# the interesting order framework does not track uniqueness (so can not do it),
# and the hard-coded DISTINCT removal, which runs before the join optimizer,
# does not take functional dependencies into account, so it does not know that
# t1.x (in the SELECT list) = t2.x (in the GROUP BY list).

CREATE TABLE t (x INTEGER);
INSERT INTO t VALUES (1), (2), (3);
ANALYZE TABLE t;
--replace_regex / *\(cost=.*//
EXPLAIN FORMAT=TREE
 SELECT DISTINCT MIN(t2.x), t1.x
 FROM t t1 JOIN t t2 USING (x)
 GROUP BY t2.x;
DROP TABLE t;


--echo #
--echo # Bug #34670701 Too many ROLLUP rows with hypergraph
--echo #

CREATE TABLE t1(
  a INT,
  b INT,
  c INT,
  d INT,
  e INT,
    PRIMARY KEY(a,b),
  KEY ix1 (c,d)
);

INSERT INTO t1 VALUES (0,0,0,0,1), (1,0,1,0,1), (0,1,2,0,1), (2,0,2,0,1), (4,0,0,0,1);
ANALYZE TABLE t1;

# When we use ROLLUP, we can scan ix1 if the group-by terms form a (correctly ordered)
# prefix of [c,d,a,b] and the primary key for a prefix of [a,b]. Otherwise, we must sort.

# All of these need sort.

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,c,d,sum(e) FROM t1 GROUP BY a,c,d WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,a,d,sum(e) FROM t1 GROUP BY c,a,d WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,b,sum(e) FROM t1 GROUP BY c,d,b WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,c,a,sum(e) FROM t1 GROUP BY d,c,a WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,a,sum(e) FROM t1 GROUP BY b,a WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,sum(e) FROM t1 GROUP BY b WITH ROLLUP;

# For these queries, the group-by key form a prefix of  [c,d,a,b]
# (in that order). Therefore they can scan ix1.

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,sum(e) FROM t1 GROUP BY c WITH ROLLUP;

SELECT c,sum(e) FROM t1 GROUP BY c WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,sum(e) FROM t1 GROUP BY c,d WITH ROLLUP;

SELECT c,d,sum(e) FROM t1 GROUP BY c,d WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,a,sum(e) FROM t1 GROUP BY c,d,a WITH ROLLUP;

SELECT c,d,a,sum(e) FROM t1 GROUP BY c,d,a WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,a,b,sum(e) FROM t1 GROUP BY c,d,a,b WITH ROLLUP;

SELECT c,d,a,b,sum(e) FROM t1 GROUP BY c,d,a,b WITH ROLLUP;

# For these queries, the group-by terms for a prefix of the primary index.
# So we can scan that.

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,sum(e) FROM t1 GROUP BY a WITH ROLLUP;

SELECT a,sum(e) FROM t1 GROUP BY a WITH ROLLUP;

--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,b,sum(e) FROM t1 GROUP BY a,b WITH ROLLUP;

SELECT a,b,sum(e) FROM t1 GROUP BY a,b WITH ROLLUP;

#No ROLLUP. We scan ix1 since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c;

SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c;

#No ROLLUP. We scan ix1 since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c;

SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c;

#No ROLLUP. We scan the primary index since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,a,sum(e) FROM t1 GROUP BY b,a;

SELECT b,a,sum(e) FROM t1 GROUP BY b,a;

#No ROLLUP. No index covers the group-by fields, therefore we must sort.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,c,e,sum(d) FROM t1 GROUP BY a,c,e;

DROP TABLE t1;

--echo #
--echo # Bug #33968442: Hypergraph gives too high row estimates for GROUP BY
--echo #

CREATE TABLE num10 (n INT);
INSERT INTO num10 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

CREATE VIEW num1000 AS
SELECT d1.n+d2.n*10+d3.n*100 n FROM num10 d1, num10 d2, num10 d3;

CREATE TABLE t1(
  a INT,
  b INT,
  c INT,
  d INT,
  e INT,
  f INT,
  g INT,
  h INT,
  i INT,
  j INT,
  k INT,
  l INT,
  PRIMARY KEY(a,b),
  KEY ix1 (c,d),
  KEY ix2 (d,a,c),
  KEY ix3 (g,h,i,j),
  KEY ix4 (k,j,l),
  KEY ix5 (k,l)
);

INSERT INTO t1
  SELECT n/100,n%100,n%5,n%7,n%11,n%13,n%10,n%10,n%10,n%10,n%10,n%10
  FROM num1000;

ANALYZE TABLE t1;

# Estimate result size using primary index.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY a;

# Estimate result size using ix1.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c;

# Estimate result size using ix1.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,c;

# Estimate result size using ix2.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,a;

# Estimate result size using ix1 or ix2.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,d,a;

# Estimate result size using:
# - ix1 or ix2 for d,c,a.
# - ix1 or ix2 for d,c
# - ix2 d
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,a,c WITH ROLLUP;

# Estimate result size using:
# - ix1 or ix2 for c,d,a.
# - ix1 for c,d
# - ix1 for c
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,d,a WITH ROLLUP;

# Estimate result size using:
# - ix1 or ix2 for c,a,d.
# - ix1 and PRIMARY for c,a
# - ix1 for c
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,a,d WITH ROLLUP;

# Estimate result size using ix1 and PRIMARY.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,a;

# Estimate result size using ix1 (for c) and t1 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,b;

# Estimate result size using t1 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e,f;

# Estimate result size using ix3 and ix5 (not one-field prefix of ix4).
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY g,h,i,j,k,l;

ANALYZE TABLE t1 UPDATE HISTOGRAM ON a,b,c,d,e,f,g,h,i;
ANALYZE TABLE t1;

# Estimate result size using histogram.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e;

# Estimate result size using histograms.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e,f;

# Estimate result size using histograms, and then cap to input set size.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 WHERE b>95 GROUP BY e,f;

# Estimate result size using input row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c+0,e+0;

# Estimate result size using input row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 WHERE b>95 GROUP BY c+0,e+0;

# Estimate result size using primary index (for a), histogram (for e)
# and t1 row count for 'c+0'.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY a,e,c+0;

CREATE TABLE t2 (
  c1 INT,
  c2 INT,
  c3 INT,
  PRIMARY KEY(c1,c2)
);

INSERT INTO t2 SELECT n%5,n/5,n%3 FROM num10;
ANALYZE TABLE t2 UPDATE HISTOGRAM ON c3;
ANALYZE TABLE t2;

# Estimate result size using primary index.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c1;

# Estimate result size using t2 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c2;

# Estimate result size using histogram.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c3;

# Estimate result size using primary index (a) and histogram (c3).
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY a,c3;

DROP VIEW num1000;
DROP TABLE num10, t1, t2;