1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
|
/*
Copyright (c) 2015 MariaDB Corporation Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
/*
== ANALYZE-stmt classes ==
This file contains classes for supporting "ANALYZE statement" feature. These are
a set of data structures that can be used to store the data about how the
statement executed.
There are two kinds of data collection:
1. Various counters. We assume that incrementing counters has very low
overhead. Because of that, execution code increments counters unconditionally
(even when not running "ANALYZE $statement" commands. You run regular SELECT/
UPDATE/DELETE/etc and the counters are incremented).
As a free bonus, this lets us print detailed information into the slow query
log, should the query be slow.
2. Timing data. Measuring the time it took to run parts of query has noticeable
overhead. Because of that, we measure the time only when running "ANALYZE
$stmt").
*/
/*
A class for tracking time it takes to do a certain action
*/
class Exec_time_tracker
{
protected:
ulonglong count;
ulonglong cycles;
ulonglong last_start;
void cycles_stop_tracking()
{
ulonglong end= my_timer_cycles();
cycles += end - last_start;
if (unlikely(end < last_start))
cycles += ULONGLONG_MAX;
}
public:
Exec_time_tracker() : count(0), cycles(0) {}
// interface for collecting time
void start_tracking()
{
last_start= my_timer_cycles();
}
void stop_tracking()
{
count++;
cycles_stop_tracking();
}
// interface for getting the time
ulonglong get_loops() const { return count; }
double get_time_ms() const
{
// convert 'cycles' to milliseconds.
return 1000 * ((double)cycles) / sys_timer_info.cycles.frequency;
}
};
/*
A class for counting certain actions (in all queries), and optionally
collecting the timings (in ANALYZE queries).
*/
class Time_and_counter_tracker: public Exec_time_tracker
{
public:
const bool timed;
Time_and_counter_tracker(bool timed_arg) : timed(timed_arg)
{}
/* Loops are counted in both ANALYZE and regular queries, as this is cheap */
void incr_loops() { count++; }
/*
Unlike Exec_time_tracker::stop_tracking, we don't increase loops.
*/
void stop_tracking()
{
cycles_stop_tracking();
}
};
#define ANALYZE_START_TRACKING(tracker) \
{ \
(tracker)->incr_loops(); \
if (unlikely((tracker)->timed)) \
{ (tracker)->start_tracking(); } \
}
#define ANALYZE_STOP_TRACKING(tracker) \
if (unlikely((tracker)->timed)) \
{ (tracker)->stop_tracking(); }
/*
A class for collecting read statistics.
The idea is that we run several scans. Each scans gets rows, and then filters
some of them out. We count scans, rows, and rows left after filtering.
(note: at the moment, the class is not actually tied to a physical table.
It can be used to track reading from files, buffers, etc).
*/
class Table_access_tracker
{
public:
Table_access_tracker() :
r_scans(0), r_rows(0), /*r_rows_after_table_cond(0),*/
r_rows_after_where(0)
{}
ha_rows r_scans; /* How many scans were ran on this join_tab */
ha_rows r_rows; /* How many rows we've got after that */
ha_rows r_rows_after_where; /* Rows after applying attached part of WHERE */
bool has_scans() { return (r_scans != 0); }
ha_rows get_loops() { return r_scans; }
double get_avg_rows()
{
return r_scans ? ((double)r_rows / r_scans): 0;
}
double get_filtered_after_where()
{
double r_filtered;
if (r_rows > 0)
r_filtered= (double)r_rows_after_where / r_rows;
else
r_filtered= 1.0;
return r_filtered;
}
inline void on_scan_init() { r_scans++; }
inline void on_record_read() { r_rows++; }
inline void on_record_after_where() { r_rows_after_where++; }
};
class Json_writer;
/*
This stores the data about how filesort executed.
A few things from here (e.g. r_used_pq, r_limit) belong to the query plan,
however, these parameters are calculated right during the execution so we
can't easily put them into the query plan.
The class is designed to handle multiple invocations of filesort().
*/
class Filesort_tracker : public Sql_alloc
{
public:
Filesort_tracker(bool do_timing) :
time_tracker(do_timing), r_limit(0), r_used_pq(0),
r_examined_rows(0), r_sorted_rows(0), r_output_rows(0),
sort_passes(0),
sort_buffer_size(0)
{}
/* Functions that filesort uses to report various things about its execution */
inline void report_use(ha_rows r_limit_arg)
{
if (!time_tracker.get_loops())
r_limit= r_limit_arg;
else
r_limit= (r_limit != r_limit_arg)? 0: r_limit_arg;
ANALYZE_START_TRACKING(&time_tracker);
}
inline void incr_pq_used() { r_used_pq++; }
inline void report_row_numbers(ha_rows examined_rows,
ha_rows sorted_rows,
ha_rows returned_rows)
{
r_examined_rows += examined_rows;
r_sorted_rows += sorted_rows;
r_output_rows += returned_rows;
}
inline void report_merge_passes_at_start(ulong passes)
{
sort_passes -= passes;
}
inline void report_merge_passes_at_end(ulong passes)
{
ANALYZE_STOP_TRACKING(&time_tracker);
sort_passes += passes;
}
inline void report_sort_buffer_size(size_t bufsize)
{
if (sort_buffer_size)
sort_buffer_size= ulonglong(-1); // multiple buffers of different sizes
else
sort_buffer_size= bufsize;
}
/* Functions to get the statistics */
void print_json_members(Json_writer *writer);
ulonglong get_r_loops() const { return time_tracker.get_loops(); }
double get_avg_examined_rows()
{
return ((double)r_examined_rows) / get_r_loops();
}
double get_avg_returned_rows()
{
return ((double)r_output_rows) / get_r_loops();
}
double get_r_filtered()
{
if (r_examined_rows > 0)
return ((double)r_sorted_rows / r_examined_rows);
else
return 1.0;
}
private:
Time_and_counter_tracker time_tracker;
//ulonglong r_loops; /* How many times filesort was invoked */
/*
LIMIT is typically a constant. There is never "LIMIT 0".
HA_POS_ERROR means we never had a limit
0 means different values of LIMIT were used in
different filesort invocations
other value means the same LIMIT value was used every time.
*/
ulonglong r_limit;
ulonglong r_used_pq; /* How many times PQ was used */
/* How many rows were examined (before checking the select->cond) */
ulonglong r_examined_rows;
/*
How many rows were put into sorting (this is examined_rows minus rows that
didn't pass the WHERE condition)
*/
ulonglong r_sorted_rows;
/*
How many rows were returned. This is equal to r_sorted_rows, unless there
was a LIMIT N clause in which case filesort would not have returned more
than N rows.
*/
ulonglong r_output_rows;
/* How many sorts in total (divide by r_count to get the average) */
ulonglong sort_passes;
/*
0 - means not used (or not known
(ulonglong)-1 - multiple
other - value
*/
ulonglong sort_buffer_size;
};
typedef enum
{
EXPL_NO_TMP_TABLE=0,
EXPL_TMP_TABLE_BUFFER,
EXPL_TMP_TABLE_GROUP,
EXPL_TMP_TABLE_DISTINCT
} enum_tmp_table_use;
typedef enum
{
EXPL_ACTION_EOF, /* not-an-action */
EXPL_ACTION_FILESORT,
EXPL_ACTION_TEMPTABLE,
EXPL_ACTION_REMOVE_DUPS,
} enum_qep_action;
/*
This is to track how a JOIN object has resolved ORDER/GROUP BY/DISTINCT
We are not tied to the query plan at all, because query plan does not have
sufficient information. *A lot* of decisions about ordering/grouping are
made at very late stages (in JOIN::exec, JOIN::init_execution, in
create_sort_index and even in create_tmp_table).
The idea is that operations that happen during select execution will report
themselves. We have these operations:
- Sorting with filesort()
- Duplicate row removal (the one done by remove_duplicates()).
- Use of temporary table to buffer the result.
There is also "Selection" operation, done by do_select(). It reads rows,
there are several distinct cases:
1. doing the join operation on the base tables
2. reading the temporary table
3. reading the filesort output
it would be nice to build execution graph, e.g.
Select(JOIN op) -> temp.table -> filesort -> Select(filesort result)
the problem is that there is no way to tell what a do_select() call will do.
Our solution is not to have explicit selection operations. We make these
assumptions about the query plan:
- Select(JOIN op) is the first operation in the query plan
- Unless the first recorded operation is filesort(). filesort() is unable
read result of a select, so when we find it first, the query plan is:
filesort(first join table) -> Select(JOIN op) -> ...
the other popular query plan is:
Select (JOIN op) -> temp.table -> filesort() -> ...
///TODO: handle repeated execution with subselects!
*/
class Sort_and_group_tracker : public Sql_alloc
{
enum { MAX_QEP_ACTIONS = 5 };
/* Query actions in the order they were made. */
enum_qep_action qep_actions[MAX_QEP_ACTIONS];
/* Number for the next action */
int cur_action;
/*
Non-zero means there was already an execution which had
#total_actions actions
*/
int total_actions;
int get_n_actions()
{
return total_actions? total_actions: cur_action;
}
/*
TRUE<=>there were executions which took different sort/buffer/de-duplicate
routes. The counter values are not meaningful.
*/
bool varied_executions;
/* Details about query actions */
union
{
Filesort_tracker *filesort_tracker;
enum_tmp_table_use tmp_table;
}
qep_actions_data[MAX_QEP_ACTIONS];
Filesort_tracker *dummy_fsort_tracker;
bool is_analyze;
public:
Sort_and_group_tracker(bool is_analyze_arg) :
cur_action(0), total_actions(0), varied_executions(false),
dummy_fsort_tracker(NULL),
is_analyze(is_analyze_arg)
{}
/*************** Reporting interface ***************/
/* Report that join execution is started */
void report_join_start()
{
if (!total_actions && cur_action != 0)
{
/* This is a second execution */
total_actions= cur_action;
}
cur_action= 0;
}
/*
Report that a temporary table is created. The next step is to write to the
this tmp. table
*/
void report_tmp_table(TABLE *tbl);
/*
Report that we are doing a filesort.
@return
Tracker object to be used with filesort
*/
Filesort_tracker *report_sorting(THD *thd);
/*
Report that remove_duplicates() is invoked [on a temp. table].
We don't collect any statistics on this operation, yet.
*/
void report_duplicate_removal();
friend class Iterator;
/*************** Statistics retrieval interface ***************/
bool had_varied_executions() { return varied_executions; }
class Iterator
{
Sort_and_group_tracker *owner;
int idx;
public:
Iterator(Sort_and_group_tracker *owner_arg) :
owner(owner_arg), idx(owner_arg->get_n_actions() - 1)
{}
enum_qep_action get_next(Filesort_tracker **tracker/*,
enum_tmp_table_use *tmp_table_use*/)
{
/* Walk back through the array... */
if (idx < 0)
return EXPL_ACTION_EOF;
switch (owner->qep_actions[idx])
{
case EXPL_ACTION_FILESORT:
*tracker= owner->qep_actions_data[idx].filesort_tracker;
break;
case EXPL_ACTION_TEMPTABLE:
//*tmp_table_use= tmp_table_kind[tmp_table_idx++];
break;
default:
break;
}
return owner->qep_actions[idx--];
}
bool is_last_element() { return idx == -1; }
};
};
|