1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
|
/* Copyright (c) 2010, 2025, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#include "sql/filesort_utils.h"
#include <string.h>
#include <algorithm>
#include <cmath>
#include "add_with_saturate.h"
#include "my_dbug.h"
#include "my_io.h"
#include "my_pointer_arithmetic.h"
#include "sql/cmp_varlen_keys.h"
#include "sql/opt_costmodel.h"
#include "sql/sort_param.h"
#include "sql/sql_sort.h"
#include "sql/thr_malloc.h"
PSI_memory_key key_memory_Filesort_buffer_sort_keys;
using std::max;
using std::min;
using std::nth_element;
using std::sort;
using std::stable_sort;
using std::unique;
using std::vector;
namespace {
/*
An inline function which does memcmp().
This one turns out to be pretty fast on all platforms, except sparc.
See the accompanying unit tests, which measure various implementations.
*/
inline bool my_mem_compare(const uchar *s1, const uchar *s2, size_t len) {
assert(s1 != nullptr);
assert(s2 != nullptr);
for (size_t i = 0; i < len; ++i) {
if (s1[i] != s2[i]) return s1[i] < s2[i];
}
return false;
}
#define COMPARE(N) \
if (s1[N] != s2[N]) return s1[N] < s2[N]
inline bool my_mem_compare_longkey(const uchar *s1, const uchar *s2,
size_t len) {
COMPARE(0);
COMPARE(1);
COMPARE(2);
COMPARE(3);
return memcmp(s1 + 4, s2 + 4, len - 4) < 0;
}
class Mem_compare {
public:
explicit Mem_compare(size_t n) : m_size(n) {}
bool operator()(const uchar *s1, const uchar *s2) const {
return my_mem_compare(s1, s2, m_size);
}
private:
size_t m_size;
};
class Mem_compare_longkey {
public:
explicit Mem_compare_longkey(size_t n) : m_size(n) {}
bool operator()(const uchar *s1, const uchar *s2) const {
return my_mem_compare_longkey(s1, s2, m_size);
}
private:
size_t m_size;
};
class Mem_compare_varlen_key {
public:
Mem_compare_varlen_key(const Bounds_checked_array<st_sort_field> sfa,
bool use_hash_arg)
: sort_field_array(sfa.array(), sfa.size()), use_hash(use_hash_arg) {}
bool operator()(const uchar *s1, const uchar *s2) const {
return cmp_varlen_keys(sort_field_array, use_hash, s1, s2);
}
private:
Bounds_checked_array<st_sort_field> sort_field_array;
bool use_hash;
};
template <class Comp>
class Equality_from_less {
public:
explicit Equality_from_less(const Comp &comp) : m_comp(comp) {}
template <class A, class B>
bool operator()(const A &a, const B &b) const {
return !(m_comp(a, b) || m_comp(b, a));
}
private:
const Comp &m_comp;
};
} // namespace
size_t Filesort_buffer::sort_buffer(Sort_param *param, size_t num_input_rows,
size_t max_output_rows) {
param->m_sort_algorithm = Sort_param::FILESORT_ALG_NONE;
if (max_output_rows == 0) return max_output_rows;
if (num_input_rows <= 1) return num_input_rows;
if (param->max_compare_length() == 0) return num_input_rows;
const auto it_begin = begin(m_record_pointers);
auto it_end = begin(m_record_pointers) + num_input_rows;
// Due to LIMIT, we don't need to sort all of the elements, so find out
// which ones that we need and sort only those. This reduces the total
// running time from O(n log n) to O(n + k log k), which is a significant
// win for small k. nth_element() isn't guaranteed to be a stable sort,
// though, so we can only use it if an unstable one is okay.
const bool prefilter_nth_element =
max_output_rows < num_input_rows / 2 && !param->m_remove_duplicates;
if (param->using_varlen_keys()) {
const Mem_compare_varlen_key comp(param->local_sortorder, param->use_hash);
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end, comp);
it_end = it_begin + max_output_rows;
}
// TODO: Make more elaborate heuristics than just always picking
// std::sort.
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
sort(it_begin, it_end, comp);
if (param->m_remove_duplicates) {
num_input_rows =
unique(it_begin, it_end,
Equality_from_less<Mem_compare_varlen_key>(comp)) -
it_begin;
}
return std::min(num_input_rows, max_output_rows);
}
// If we don't use addon fields, we'll have the record position appended to
// the end of each record. This disturbs our equality comparisons, so we'll
// have to remove it. (Removing it also makes the comparisons ever so slightly
// cheaper.)
size_t key_len = param->max_compare_length();
if (!param->using_addon_fields()) {
key_len -= param->sum_ref_length;
}
/*
std::stable_sort has some extra overhead in allocating the temp buffer,
which takes some time. The cutover point where it starts to get faster
than quicksort seems to be somewhere around 10 to 40 records.
So we're a bit conservative, and stay with quicksort up to 100 records.
*/
if (num_input_rows <= 100) {
if (key_len < 10) {
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare(key_len));
it_end = it_begin + max_output_rows;
}
sort(it_begin, it_end, Mem_compare(key_len));
if (param->m_remove_duplicates) {
num_input_rows =
unique(it_begin, it_end,
Equality_from_less<Mem_compare>(Mem_compare(key_len))) -
it_begin;
}
return std::min(num_input_rows, max_output_rows);
}
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare_longkey(key_len));
it_end = it_begin + max_output_rows;
}
sort(it_begin, it_end, Mem_compare_longkey(key_len));
if (param->m_remove_duplicates) {
auto new_end = unique(it_begin, it_end,
Equality_from_less<Mem_compare_longkey>(
Mem_compare_longkey(key_len)));
num_input_rows = new_end - it_begin;
}
return std::min(num_input_rows, max_output_rows);
}
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_STABLE;
// Heuristics here: avoid function overhead call for short keys.
if (key_len < 10) {
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare(key_len));
it_end = it_begin + max_output_rows;
}
stable_sort(it_begin, it_end, Mem_compare(key_len));
if (param->m_remove_duplicates) {
num_input_rows =
unique(it_begin, it_end,
Equality_from_less<Mem_compare>(Mem_compare(key_len))) -
it_begin;
}
} else {
if (prefilter_nth_element) {
nth_element(it_begin, it_begin + max_output_rows - 1, it_end,
Mem_compare_longkey(key_len));
it_end = it_begin + max_output_rows;
}
stable_sort(it_begin, it_end, Mem_compare_longkey(key_len));
if (param->m_remove_duplicates) {
num_input_rows = unique(it_begin, it_end,
Equality_from_less<Mem_compare_longkey>(
Mem_compare_longkey(key_len))) -
it_begin;
}
}
return std::min(num_input_rows, max_output_rows);
}
void Filesort_buffer::reset() {
update_peak_memory_used();
m_record_pointers.clear();
if (m_blocks.size() >= 2) {
// Free every block but the last.
m_blocks.erase(m_blocks.begin(), m_blocks.end() - 1);
}
/*
m_max_record_length can have changed since last time; if the remaining
(largest) block is not large enough for a single row of the next size,
then clear out that, too.
*/
if (m_max_record_length > m_current_block_size) {
free_sort_buffer();
}
if (m_blocks.empty()) {
assert(m_next_rec_ptr == nullptr);
assert(m_current_block_end == nullptr);
assert(m_current_block_size == 0);
} else {
m_next_rec_ptr = m_blocks[0].get();
assert(m_current_block_end == m_next_rec_ptr + m_current_block_size);
}
m_space_used_other_blocks = 0;
}
bool Filesort_buffer::preallocate_records(size_t num_records) {
if (m_max_record_length == 0xFFFFFFFFu) {
// The rest of the code uses this value for “infinite” and saturates to it,
// so even if we have a large sort buffer (> 4 GB), we we can't know for
// sure there's going to be room.
return true;
}
reset();
const size_t bytes_needed = num_records * m_max_record_length;
if (bytes_needed + num_records * sizeof(m_record_pointers[0]) >
m_max_size_in_bytes) {
return true;
}
/*
If the remaining block can't hold what we need, then it's of no
use to us (it doesn't save us any allocations), so get rid of it
and allocate one that's exactly the right size.
*/
if (m_next_rec_ptr == nullptr ||
m_next_rec_ptr + bytes_needed > m_current_block_end) {
free_sort_buffer();
if (allocate_sized_block(bytes_needed)) {
return true;
}
m_record_pointers.reserve(num_records);
}
while (m_record_pointers.size() < num_records) {
Bounds_checked_array<uchar> ptr =
get_next_record_pointer(m_max_record_length);
(void)ptr;
assert(ptr.array() != nullptr);
commit_used_memory(m_max_record_length);
}
return false;
}
bool Filesort_buffer::allocate_block(size_t num_bytes) {
DBUG_EXECUTE_IF("alloc_sort_buffer_fail",
DBUG_SET("+d,simulate_out_of_memory"););
size_t next_block_size;
if (m_current_block_size == 0) {
// First block.
next_block_size = MIN_SORT_MEMORY;
} else {
next_block_size = m_current_block_size + m_current_block_size / 2;
}
/*
If our last block isn't used at all, we can safely free it
before we try to allocate a larger one. Note that we do this
after the calculation above, which uses m_current_block_size.
*/
if (!m_blocks.empty() && m_blocks.back().get() == m_next_rec_ptr) {
m_current_block_size = 0;
m_next_rec_ptr = nullptr;
m_current_block_end = nullptr;
m_blocks.pop_back();
}
// Figure out how much space we've used, to see how much is left (if
// anything).
size_t space_used = m_current_block_size + m_space_used_other_blocks;
space_used += m_record_pointers.capacity() * sizeof(m_record_pointers[0]);
size_t space_left;
if (space_used > m_max_size_in_bytes)
space_left = 0;
else
space_left = m_max_size_in_bytes - space_used;
/*
Adjust space_left to take into account that filling this new buffer
with records would necessarily also add pointers to m_record_pointers.
Note that we know how much space record_pointers currently is using,
but not how much it could potentially be using in the future as we add
records; we take a best-case estimate based on maximum-size records.
It's also impossible to say how capacity() will change since this
is an implementation detail, so we don't take that into account.
This means that, for smaller records, we could go above the maximum
permitted total memory usage.
*/
size_t min_num_rows_capacity =
m_record_pointers.size() +
space_left /
AddWithSaturate(m_max_record_length, sizeof(m_record_pointers[0]));
if (min_num_rows_capacity > m_record_pointers.capacity()) {
space_left -= (min_num_rows_capacity - m_record_pointers.capacity()) *
sizeof(m_record_pointers[0]);
}
next_block_size = min(max(next_block_size, num_bytes), space_left);
if (next_block_size < num_bytes) {
/*
If we're really out of space, but have at least 32 kB unused in
m_record_pointers, try to reclaim some space and try again. This should
only be needed in some very rare cases where we first sort a lot of very
short rows (yielding a huge amount of record pointers) and then need to
sort huge rows that wouldn't fit in the buffer otherwise -- in other
words, nearly never.
*/
size_t excess_bytes =
(m_record_pointers.capacity() - m_record_pointers.size()) *
sizeof(m_record_pointers[0]);
if (excess_bytes >= 32768) {
size_t old_capacity = m_record_pointers.capacity();
m_record_pointers.shrink_to_fit();
if (m_record_pointers.capacity() < old_capacity) {
return allocate_block(num_bytes);
}
}
// We're full.
return true;
}
return allocate_sized_block(next_block_size);
}
bool Filesort_buffer::allocate_sized_block(size_t block_size) {
unique_ptr_my_free<uchar[]> new_block((uchar *)my_malloc(
key_memory_Filesort_buffer_sort_keys, block_size, MYF(0)));
if (new_block == nullptr) {
return true;
}
m_space_used_other_blocks += m_current_block_size;
m_current_block_size = block_size;
m_next_rec_ptr = new_block.get();
m_current_block_end = new_block.get() + m_current_block_size;
m_blocks.push_back(std::move(new_block));
return false;
}
void Filesort_buffer::free_sort_buffer() {
update_peak_memory_used();
// std::vector::clear() does not necessarily free all the memory,
// but moving or swapping in an empty vector typically does (and we
// rely on this, even though we really shouldn't). This shouldn't have
// been a problem since they will be cleared in the destructor, but
// there are _many_ places scattered around the code that construct TABLE
// objects (which indirectly contain Filesort_buffer objects) and never
// destroy them properly. (You can find lots of them easily by adding an
// std::unique_ptr<int> to Filesort_buffer and giving it a value in the
// constructor; it will leak all over the place.) We should fix that,
// but for the time being, we have this workaround instead.
m_record_pointers = vector<uchar *>();
m_blocks = vector<unique_ptr_my_free<uchar[]>>();
m_space_used_other_blocks = 0;
m_next_rec_ptr = nullptr;
m_current_block_end = nullptr;
m_current_block_size = 0;
}
Bounds_checked_array<uchar> Filesort_buffer::get_contiguous_buffer() {
if (m_current_block_size != m_max_size_in_bytes) {
free_sort_buffer();
if (allocate_sized_block(m_max_size_in_bytes)) {
return Bounds_checked_array<uchar>(nullptr, 0);
}
}
return Bounds_checked_array<uchar>(m_blocks.back().get(),
m_max_size_in_bytes);
}
void Filesort_buffer::update_peak_memory_used() const {
m_peak_memory_used =
max(m_peak_memory_used,
m_record_pointers.capacity() * sizeof(m_record_pointers[0]) +
m_current_block_size + m_space_used_other_blocks);
}
|