1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
|
--source include/have_hypergraph.inc
--source include/elide_costs.inc
--echo #
--echo # Bug#32980875: WL#14422: ASSERTION `FALSE' FAILED|SQL/BASIC_ROW_ITERATORS.H
--echo #
# This should have been a unit test in hypergraph_optimizer-t.cc, but
# the unit test framework does not currently allow creating temporary
# tables.
#
# We want to test that the hypergraph optimizer doesn't try to do
# sort-ahead on an aggregate.
#
# We expect the hypergraph optimizer to set up a plan like this:
#
# -> Remove duplicates from input grouped on t.x, `MIN(t2.x)`
# -> Stream results
# -> Group aggregate: min(t2.x)
# -> Sort: t2.x
# -> Inner hash join (t1.x = t2.x)
# -> Table scan on t1
# -> Hash
# -> Table scan on t2
#
# Before, it would set up this plan, which tried to sort on MIN(t2.x)
# before the aggregation had happened:
#
# -> Remove duplicates from input grouped on t1.x, min(t2.x)
# -> Stream results
# -> Group aggregate: min(t2.x)
# -> Sort: t1.x, min(t2.x)
# -> Inner hash join (t1.x = t2.x)
# -> Table scan on t1
# -> Hash
# -> Table scan on t2
#
# Note that because MIN(t2.x) is functionally dependent on the GROUP BY
# expression (which happens to be t2.x as well here), we avoid a sort
# in the final DISTINCT pass. We further shuffle the SELECT expressions
# around a bit (putting the MIN() first) to demonstrate that our sorting of
# expressions in an interesting grouping is robust.
#
# Of course, we should have been able to remove the entire DISTINCT operation,
# and if we wrote t2.x instead of t1.x, we would be able to do that. However,
# the interesting order framework does not track uniqueness (so can not do it),
# and the hard-coded DISTINCT removal, which runs before the join optimizer,
# does not take functional dependencies into account, so it does not know that
# t1.x (in the SELECT list) = t2.x (in the GROUP BY list).
CREATE TABLE t (x INTEGER);
INSERT INTO t VALUES (1), (2), (3);
ANALYZE TABLE t;
--replace_regex / *\(cost=.*//
EXPLAIN FORMAT=TREE
SELECT DISTINCT MIN(t2.x), t1.x
FROM t t1 JOIN t t2 USING (x)
GROUP BY t2.x;
DROP TABLE t;
--echo #
--echo # Bug #34670701 Too many ROLLUP rows with hypergraph
--echo #
CREATE TABLE t1(
a INT,
b INT,
c INT,
d INT,
e INT,
PRIMARY KEY(a,b),
KEY ix1 (c,d)
);
INSERT INTO t1 VALUES (0,0,0,0,1), (1,0,1,0,1), (0,1,2,0,1), (2,0,2,0,1), (4,0,0,0,1);
ANALYZE TABLE t1;
# When we use ROLLUP, we can scan ix1 if the group-by terms form a (correctly ordered)
# prefix of [c,d,a,b] and the primary key for a prefix of [a,b]. Otherwise, we must sort.
# All of these need sort.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,c,d,sum(e) FROM t1 GROUP BY a,c,d WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,a,d,sum(e) FROM t1 GROUP BY c,a,d WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,b,sum(e) FROM t1 GROUP BY c,d,b WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,c,a,sum(e) FROM t1 GROUP BY d,c,a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,a,sum(e) FROM t1 GROUP BY b,a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,sum(e) FROM t1 GROUP BY b WITH ROLLUP;
# For these queries, the group-by key form a prefix of [c,d,a,b]
# (in that order). Therefore they can scan ix1.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,sum(e) FROM t1 GROUP BY c WITH ROLLUP;
SELECT c,sum(e) FROM t1 GROUP BY c WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,sum(e) FROM t1 GROUP BY c,d WITH ROLLUP;
SELECT c,d,sum(e) FROM t1 GROUP BY c,d WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,a,sum(e) FROM t1 GROUP BY c,d,a WITH ROLLUP;
SELECT c,d,a,sum(e) FROM t1 GROUP BY c,d,a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,a,b,sum(e) FROM t1 GROUP BY c,d,a,b WITH ROLLUP;
SELECT c,d,a,b,sum(e) FROM t1 GROUP BY c,d,a,b WITH ROLLUP;
# For these queries, the group-by terms for a prefix of the primary index.
# So we can scan that.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,sum(e) FROM t1 GROUP BY a WITH ROLLUP;
SELECT a,sum(e) FROM t1 GROUP BY a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,b,sum(e) FROM t1 GROUP BY a,b WITH ROLLUP;
SELECT a,b,sum(e) FROM t1 GROUP BY a,b WITH ROLLUP;
#No ROLLUP. We scan ix1 since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c;
SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c;
#No ROLLUP. We scan ix1 since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c;
SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c;
#No ROLLUP. We scan the primary index since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,a,sum(e) FROM t1 GROUP BY b,a;
SELECT b,a,sum(e) FROM t1 GROUP BY b,a;
#No ROLLUP. No index covers the group-by fields, therefore we must sort.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,c,e,sum(d) FROM t1 GROUP BY a,c,e;
DROP TABLE t1;
--echo #
--echo # Bug #33968442: Hypergraph gives too high row estimates for GROUP BY
--echo #
CREATE TABLE num10 (n INT);
INSERT INTO num10 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
CREATE VIEW num1000 AS
SELECT d1.n+d2.n*10+d3.n*100 n FROM num10 d1, num10 d2, num10 d3;
CREATE TABLE t1(
a INT,
b INT,
c INT,
d INT,
e INT,
f INT,
g INT,
h INT,
i INT,
j INT,
k INT,
l INT,
PRIMARY KEY(a,b),
KEY ix1 (c,d),
KEY ix2 (d,a,c),
KEY ix3 (g,h,i,j),
KEY ix4 (k,j,l),
KEY ix5 (k,l)
);
INSERT INTO t1
SELECT n/100,n%100,n%5,n%7,n%11,n%13,n%10,n%10,n%10,n%10,n%10,n%10
FROM num1000;
ANALYZE TABLE t1;
# Estimate result size using primary index.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY a;
# Estimate result size using ix1.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c;
# Estimate result size using ix1.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,c;
# Estimate result size using ix2.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,a;
# Estimate result size using ix1 or ix2.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,d,a;
# Estimate result size using:
# - ix1 or ix2 for d,c,a.
# - ix1 or ix2 for d,c
# - ix2 d
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,a,c WITH ROLLUP;
# Estimate result size using:
# - ix1 or ix2 for c,d,a.
# - ix1 for c,d
# - ix1 for c
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,d,a WITH ROLLUP;
# Estimate result size using:
# - ix1 or ix2 for c,a,d.
# - ix1 and PRIMARY for c,a
# - ix1 for c
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,a,d WITH ROLLUP;
# Estimate result size using ix1 and PRIMARY.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,a;
# Estimate result size using ix1 (for c) and t1 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,b;
# Estimate result size using t1 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e,f;
# Estimate result size using ix3 and ix5 (not one-field prefix of ix4).
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY g,h,i,j,k,l;
ANALYZE TABLE t1 UPDATE HISTOGRAM ON a,b,c,d,e,f,g,h,i;
ANALYZE TABLE t1;
# Estimate result size using histogram.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e;
# Estimate result size using histograms.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e,f;
# Estimate result size using histograms, and then cap to input set size.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 WHERE b>95 GROUP BY e,f;
# Estimate result size using input row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c+0,e+0;
# Estimate result size using input row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 WHERE b>95 GROUP BY c+0,e+0;
# Estimate result size using primary index (for a), histogram (for e)
# and t1 row count for 'c+0'.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY a,e,c+0;
CREATE TABLE t2 (
c1 INT,
c2 INT,
c3 INT,
PRIMARY KEY(c1,c2)
);
INSERT INTO t2 SELECT n%5,n/5,n%3 FROM num10;
ANALYZE TABLE t2 UPDATE HISTOGRAM ON c3;
ANALYZE TABLE t2;
# Estimate result size using primary index.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c1;
# Estimate result size using t2 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c2;
# Estimate result size using histogram.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c3;
# Estimate result size using primary index (a) and histogram (c3).
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY a,c3;
DROP VIEW num1000;
DROP TABLE num10, t1, t2;
|