File: histogram.h

package info (click to toggle)
mysql-8.0 8.0.43-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,273,924 kB
  • sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (789 lines) | stat: -rw-r--r-- 28,175 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
#ifndef HISTOGRAMS_HISTOGRAM_INCLUDED
#define HISTOGRAMS_HISTOGRAM_INCLUDED

/* Copyright (c) 2016, 2025, Oracle and/or its affiliates.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2.0,
   as published by the Free Software Foundation.

   This program is designed to work with certain software (including
   but not limited to OpenSSL) that is licensed under separate terms,
   as designated in a particular file or component or in included license
   documentation.  The authors of MySQL hereby grant you an additional
   permission to link the program and your derivative works with the
   separately licensed software that they have either included with
   the program or referenced in the documentation.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License, version 2.0, for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */

/**
  @file sql/histograms/histogram.h
  Histogram base class.

  This file defines the base class for all histogram types. We keep the base
  class itself non-templatized in order to more easily send a histogram as an
  argument, collect multiple histograms in a single collection etc.

  A histogram is stored as a JSON object. This gives the flexibility of storing
  virtually an unlimited number of buckets, data values in its full length and
  easily expanding with new histogram types in the future. They are stored
  persistently in the system table mysql.column_stats.

  We keep all histogram code in the namespace "histograms" in order to avoid
  name conflicts etc.
*/

#include <cstddef>  // size_t
#include <functional>
#include <map>      // std::map
#include <set>      // std::set
#include <string>   // std::string
#include <utility>  // std::pair

#include "lex_string.h"  // LEX_CSTRING
#include "my_base.h"     // ha_rows
#include "sql/field.h"   // Field
#include "sql/histograms/value_map_type.h"
#include "sql/mem_root_allocator.h"   // Mem_root_allocator
#include "sql/stateless_allocator.h"  // Stateless_allocator

class Item;
class Json_dom;
class Json_object;
class THD;
struct TYPELIB;
class Field;

namespace dd {
class Table;
}  // namespace dd
namespace histograms {
struct Histogram_comparator;
template <class T>
class Value_map;
}  // namespace histograms
struct CHARSET_INFO;
struct MEM_ROOT;
class Table_ref;
class Json_dom;

namespace histograms {

/// The default (and invalid) value for "m_null_values_fraction".
static const double INVALID_NULL_VALUES_FRACTION = -1.0;

enum class Message {
  FIELD_NOT_FOUND,
  UNSUPPORTED_DATA_TYPE,
  TEMPORARY_TABLE,
  ENCRYPTED_TABLE,
  VIEW,
  HISTOGRAM_CREATED,
  MULTIPLE_TABLES_SPECIFIED,
  COVERED_BY_SINGLE_PART_UNIQUE_INDEX,
  NO_HISTOGRAM_FOUND,
  HISTOGRAM_DELETED,
  SERVER_READ_ONLY,
  MULTIPLE_COLUMNS_SPECIFIED,

  // JSON validation errors. See Error_context.
  JSON_FORMAT_ERROR,
  JSON_NOT_AN_OBJECT,
  JSON_MISSING_ATTRIBUTE,
  JSON_WRONG_ATTRIBUTE_TYPE,
  JSON_WRONG_BUCKET_TYPE_2,
  JSON_WRONG_BUCKET_TYPE_4,
  JSON_WRONG_DATA_TYPE,
  JSON_UNSUPPORTED_DATA_TYPE,
  JSON_UNSUPPORTED_HISTOGRAM_TYPE,
  JSON_UNSUPPORTED_CHARSET,
  JSON_INVALID_SAMPLING_RATE,
  JSON_INVALID_NUM_BUCKETS_SPECIFIED,
  JSON_INVALID_FREQUENCY,
  JSON_INVALID_NUM_DISTINCT,
  JSON_VALUE_FORMAT_ERROR,
  JSON_VALUE_OUT_OF_RANGE,
  JSON_VALUE_NOT_ASCENDING_1,
  JSON_VALUE_NOT_ASCENDING_2,
  JSON_VALUE_DESCENDING_IN_BUCKET,
  JSON_CUMULATIVE_FREQUENCY_NOT_ASCENDING,
  JSON_INVALID_NULL_VALUES_FRACTION,
  JSON_INVALID_TOTAL_FREQUENCY,
  JSON_NUM_BUCKETS_MORE_THAN_SPECIFIED,
  JSON_IMPOSSIBLE_EMPTY_EQUI_HEIGHT,
};

struct Histogram_psi_key_alloc {
  void *operator()(size_t s) const;
};

template <class T>
using Histogram_key_allocator = Stateless_allocator<T, Histogram_psi_key_alloc>;

template <class T>
using value_map_allocator = Mem_root_allocator<std::pair<const T, ha_rows>>;

template <typename T>
using value_map_type =
    std::map<T, ha_rows, Histogram_comparator, value_map_allocator<T>>;

using columns_set = std::set<std::string, std::less<std::string>,
                             Histogram_key_allocator<std::string>>;

// Used as an array, so duplicate values are not checked.
// TODO((tlchrist): Convert this std::map to an array.
using results_map =
    std::map<std::string, Message, std::less<std::string>,
             Histogram_key_allocator<std::pair<const std::string, Message>>>;

/**
  The different operators we can ask histogram statistics for selectivity
  estimations.
*/
enum class enum_operator {
  EQUALS_TO,
  GREATER_THAN,
  LESS_THAN,
  IS_NULL,
  IS_NOT_NULL,
  LESS_THAN_OR_EQUAL,
  GREATER_THAN_OR_EQUAL,
  NOT_EQUALS_TO,
  BETWEEN,
  NOT_BETWEEN,
  IN_LIST,
  NOT_IN_LIST
};

/**
  Error context to validate given JSON object which represents a histogram.

  A validation error consists of two pieces of information:

    1) error code  - what kind of error it is
    2) JSON path   - where the error occurs

  Errors are classified into a few conceptual categories, namely

    1) absence of required attributes
    2) unexpected JSON type of attributes
    3) value encoding corruption
    4) value out of domain
    5) breaking bucket sequence semantics
    6) breaking certain constraint between pieces of information

  @see histograms::Message for the list of JSON validation errors.

  Use of the Error_context class
  ------------------------------

  An Error_context object is passed along with other parameters to the
  json_to_histogram() function that is used to create a histogram object (e.g.
  Equi_height<longlong>) from a JSON string.

  The json_to_histogram() function has two different use cases, with different
  requirements for validation:

  1) Deserializing a histogram that was retrieved from the dictionary. In this
     case the histogram has already been validated, and the user is not
     expecting validation feedback, so we pass along a default-constructed
     "empty shell" Error_context object with no-op operations.

  2) When validating the user-supplied JSON string to the UPDATE HISTOGRAM ...
     USING DATA commmand. In this case we pass along an active Error_context
     object that uses a Field object to validate bucket values, and stores
     results in a results_map.

  The binary() method is used to distinguish between these two contexts/cases.
*/
class Error_context {
 public:
  /// Default constructor. Used when deserializing binary JSON that has already
  /// been validated, e.g. when retrieving a histogram from the dictionary, and
  /// the Error_context object is not actively used for validation.
  Error_context()
      : m_thd{nullptr}, m_field{nullptr}, m_results{nullptr}, m_binary{true} {}

  /**
    Constructor. Used in the context of deserializing the user-supplied JSON
    string to the UPDATE HISTOGRAM ... USING DATA command.

    @param thd      Thread context
    @param field    The field for values on which the histogram is built
    @param results  Where reported errors are stored
    */
  Error_context(THD *thd, Field *field, results_map *results)
      : m_thd(thd), m_field(field), m_results(results), m_binary(false) {}

  /**
    Report a global error to this context.

    @param err_code  The global error code
  */
  void report_global(Message err_code);

  /**
    Report to this context that a required attribute is missing.

    @param name  Name of the missing attribute
   */
  void report_missing_attribute(const std::string &name);

  /**
    Report to this context that an error occurs on the given dom node.

    @param dom       The given dom node
    @param err_code  The error code
   */
  void report_node(const Json_dom *dom, Message err_code);

  /**
    Check if the value is in the field definition domain.

    @param v Pointer to the value.

    @return true on error, false otherwise

    @note Uses Field::store() on the field for which the user-defined histogram
    is to be constructed in order to check the validity of the supplied value.
    This will have the side effect of writing to the record buffer so this
    should only be used with an active Error_context (with a non-nullptr field)
    when we do not otherwise expect to use the record buffer. Currently the only
    use case is to validate the JSON input to the command UPDATE HISTOGRAM ...
    USING DATA where it should be OK to use the field for this purpose.
   */
  template <typename T>
  bool check_value(T *v);

  /**
    Tell whether the input json is an internal persisted copy or
    a user-defined input. If the input is an internal copy, there
    should never be type/format errors. If it is a user-defined input,
    errors may occur and should be handled, and some type casting may
    be needed.

    @return true for JSON, false otherwise
   */
  bool binary() const { return m_binary; }

  /**
    Return data-type of field in context if present. Used to enforce
    that histogram datatype matches column datatype for user-defined
    histograms.

    @return datatype string if present, nullptr if not
   */
  Field *field() const { return m_field; }

 private:
  /// Thread context for error handlers
  THD *m_thd;
  /// The field for checking endpoint values
  Field *m_field;
  /// Where reported errors are stored
  results_map *m_results;
  /// Whether or not the JSON object to process is in binary format
  bool m_binary;
};

/**
  Histogram base class.

  This is an abstract class containing the interface and shared code for
  concrete histogram subclasses.

  Histogram subclasses (Singleton, Equi_height) are constructed through factory
  methods in order to catch memory allocation errors during construction.

  The histogram subclasses have no public copy or move constructors. In order to
  copy a histogram onto a given MEM_ROOT, use the public clone method. The clone
  method ensures that members of the histogram, such String type buckets,
  are also allocated on the given MEM_ROOT. Modifications to these methods need
  to be careful that histogram buckets are cloned/copied correctly.
*/
class Histogram {
 public:
  /// All supported histogram types in MySQL.
  enum class enum_histogram_type { EQUI_HEIGHT, SINGLETON };

  /// String representation of the JSON field "histogram-type".
  static constexpr const char *histogram_type_str() { return "histogram-type"; }

  /// String representation of the JSON field "data-type".
  static constexpr const char *data_type_str() { return "data-type"; }

  /// String representation of the JSON field "collation-id".
  static constexpr const char *collation_id_str() { return "collation-id"; }

  /// String representation of the histogram type SINGLETON.
  static constexpr const char *singleton_str() { return "singleton"; }

  /// String representation of the histogram type EQUI-HEIGHT.
  static constexpr const char *equi_height_str() { return "equi-height"; }

 protected:
  double m_sampling_rate;

  /// The fraction of NULL values in the histogram (between 0.0 and 1.0).
  double m_null_values_fraction;

  /// The character set for the data stored
  const CHARSET_INFO *m_charset;

  /// The number of buckets originally specified
  size_t m_num_buckets_specified;

  /// String representation of the JSON field "buckets".
  static constexpr const char *buckets_str() { return "buckets"; }

  /// String representation of the JSON field "last-updated".
  static constexpr const char *last_updated_str() { return "last-updated"; }

  /// String representation of the JSON field "null-values".
  static constexpr const char *null_values_str() { return "null-values"; }

  static constexpr const char *sampling_rate_str() { return "sampling-rate"; }

  /// String representation of the JSON field "number-of-buckets-specified".
  static constexpr const char *numer_of_buckets_specified_str() {
    return "number-of-buckets-specified";
  }

  /**
    Constructor.

    @param mem_root  the mem_root where the histogram contents will be allocated
    @param db_name   name of the database this histogram represents
    @param tbl_name  name of the table this histogram represents
    @param col_name  name of the column this histogram represents
    @param type      the histogram type (equi-height, singleton)
    @param data_type the type of data that this histogram contains
    @param[out] error is set to true if an error occurs
  */
  Histogram(MEM_ROOT *mem_root, const std::string &db_name,
            const std::string &tbl_name, const std::string &col_name,
            enum_histogram_type type, Value_map_type data_type, bool *error);

  /**
    Copy constructor

    This will make a copy of the provided histogram onto the provided MEM_ROOT.

    @param mem_root  the mem_root where the histogram contents will be allocated
    @param other     the histogram to copy
    @param[out] error is set to true if an error occurs
  */
  Histogram(MEM_ROOT *mem_root, const Histogram &other, bool *error);

  /**
    Write the data type of this histogram into a JSON object.

    @param json_object the JSON object where we will write the histogram
                       data type

    @return true on error, false otherwise
  */
  bool histogram_data_type_to_json(Json_object *json_object) const;

  /**
    Return the value that is contained in the JSON DOM object.

    For most types, this function simply returns the contained value. For String
    values, the value is allocated on this histograms MEM_ROOT before it is
    returned. This allows the String value to survive the entire lifetime of the
    histogram object.

    @param json_dom the JSON DOM object to extract the value from
    @param out      the value from the JSON DOM object
    @param context  error context for validation

    @return true on error, false otherwise
  */
  template <class T>
  bool extract_json_dom_value(const Json_dom *json_dom, T *out,
                              Error_context *context);

  /**
    Populate the histogram with data from the provided JSON object. The base
    class also provides an implementation that subclasses must call in order
    to populate fields that are shared among all histogram types (character set,
    null values fraction).

    @param json_object  the JSON object to read the histogram data from
    @param context      error context for validation

    @return true on error, false otherwise
  */
  virtual bool json_to_histogram(const Json_object &json_object,
                                 Error_context *context) = 0;

 private:
  /// The MEM_ROOT where the histogram contents will be allocated.
  MEM_ROOT *m_mem_root;

  /// The type of this histogram.
  const enum_histogram_type m_hist_type;

  /// The type of the data this histogram contains.
  const Value_map_type m_data_type;

  /// Name of the database this histogram represents.
  LEX_CSTRING m_database_name;

  /// Name of the table this histogram represents.
  LEX_CSTRING m_table_name;

  /// Name of the column this histogram represents.
  LEX_CSTRING m_column_name;

  /**
    An internal function for getting a selectivity estimate prior to adustment.
    @see get_selectivity() for details.
   */
  bool get_raw_selectivity(Item **items, size_t item_count, enum_operator op,
                           double *selectivity) const;

  /**
    An internal function for getting the selecitvity estimation.

    This function will read/evaluate the value from the given Item, and pass
    this value on to the correct selectivity estimation function based on the
    data type of the histogram. For instance, if the data type of the histogram
    is INT, we will call "val_int" on the Item to evaluate the value as an
    integer and pass this value on to the next function.

    @param item The Item to read/evaluate the value from.
    @param op The operator we are estimating the selectivity for.
    @param typelib In the case of ENUM or SET data type, this parameter holds
                   the type information. This is needed in order to map a
                   string representation of an ENUM/SET value into its correct
                   integer representation (ENUM/SET values are stored as
                   integer values in the histogram).
    @param[out] selectivity The estimated selectivity, between 0.0 and 1.0
                inclusive.

    @return true on error (i.e the provided item was NULL), false on success.
  */
  bool get_selectivity_dispatcher(Item *item, const enum_operator op,
                                  const TYPELIB *typelib,
                                  double *selectivity) const;

  /**
    An internal function for getting the selecitvity estimation.

    This function will cast the histogram to the correct class (using down_cast)
    and pass the given value on to the correct selectivity estimation function
    for that class.

    @param value The value to estimate the selectivity for.

    @return The estimated selectivity, between 0.0 and 1.0 inclusive.
  */
  template <class T>
  double get_less_than_selectivity_dispatcher(const T &value) const;

  /// @see get_less_than_selectivity_dispatcher
  template <class T>
  double get_greater_than_selectivity_dispatcher(const T &value) const;

  /// @see get_less_than_selectivity_dispatcher
  template <class T>
  double get_equal_to_selectivity_dispatcher(const T &value) const;

  /**
    An internal function for applying the correct function for the given
    operator.

    @param op    The operator to apply
    @param value The value to find the selectivity for.

    @return The estimated selectivity, between 0.0 and 1.0 inclusive.
  */
  template <class T>
  double apply_operator(const enum_operator op, const T &value) const;

 public:
  Histogram() = delete;
  Histogram(const Histogram &other) = delete;

  /// Destructor.
  virtual ~Histogram() = default;

  /// @return the MEM_ROOT that this histogram uses for allocations
  MEM_ROOT *get_mem_root() const { return m_mem_root; }

  /**
    @return name of the database this histogram represents
  */
  const LEX_CSTRING get_database_name() const { return m_database_name; }

  /**
    @return name of the table this histogram represents
  */
  const LEX_CSTRING get_table_name() const { return m_table_name; }

  /**
    @return name of the column this histogram represents
  */
  const LEX_CSTRING get_column_name() const { return m_column_name; }

  /**
    @return type of this histogram
  */
  enum_histogram_type get_histogram_type() const { return m_hist_type; }

  /**
    @return the fraction of NULL values, in the range [0.0, 1.0]
  */
  double get_null_values_fraction() const;

  /// @return the character set for the data this histogram contains
  const CHARSET_INFO *get_character_set() const { return m_charset; }

  /// @return the sampling rate used to generate this histogram
  double get_sampling_rate() const { return m_sampling_rate; }

  /**
    Returns the histogram type as a readable string.

    @return a readable string representation of the histogram type
  */
  virtual std::string histogram_type_to_str() const = 0;

  /**
    @return number of buckets in this histogram
  */
  virtual size_t get_num_buckets() const = 0;

  /**
    Get the estimated number of distinct non-NULL values.
    @return number of distinct non-NULL values
  */
  virtual size_t get_num_distinct_values() const = 0;

  /**
    @return the data type that this histogram contains
  */
  Value_map_type get_data_type() const { return m_data_type; }

  /**
    @return number of buckets originally specified by the user. This may be
            higher than the actual number of buckets in the histogram.
  */
  size_t get_num_buckets_specified() const { return m_num_buckets_specified; }

  /**
    Converts the histogram to a JSON object.

    @param[in,out] json_object output where the histogram is to be stored. The
                   caller is responsible for allocating/deallocating the JSON
                   object

    @return     true on error, false otherwise
  */
  virtual bool histogram_to_json(Json_object *json_object) const = 0;

  /**
    Converts JSON object to a histogram.

    @param  mem_root    MEM_ROOT where the histogram will be allocated
    @param  schema_name the schema name
    @param  table_name  the table name
    @param  column_name the column name
    @param  json_object output where the histogram is stored
    @param  context     error context for validation

    @return nullptr on error. Otherwise a histogram allocated on the provided
            MEM_ROOT.
  */
  static Histogram *json_to_histogram(MEM_ROOT *mem_root,
                                      const std::string &schema_name,
                                      const std::string &table_name,
                                      const std::string &column_name,
                                      const Json_object &json_object,
                                      Error_context *context);

  /**
    Make a clone of the current histogram

    @param mem_root the MEM_ROOT on which the new histogram will be allocated.

    @return a histogram allocated on the provided MEM_ROOT. Returns nullptr
            on error.
  */
  virtual Histogram *clone(MEM_ROOT *mem_root) const = 0;

  /**
    Store this histogram to persistent storage (data dictionary).

    @param thd Thread handler.

    @return false on success, true on error.
  */
  bool store_histogram(THD *thd) const;

  /**
    Get selectivity estimation.

    This function will try and get the selectivity estimation for a predicate
    on the form "COLUMN OPERATOR CONSTANT", for instance "SELECT * FROM t1
    WHERE col1 > 23;".

    This function will take care of several of things, for instance checking
    that the value we are estimating the selectivity for is a constant value.

    The order of the Items provided does not matter. For instance, of the
    operator argument given is "EQUALS_TO", it does not matter if the constant
    value is provided as the first or the second argument; this function will
    take care of this.

    @param items            an array of items that contains both the field we
                            are estimating the selectivity for, as well as the
                            user-provided constant values.
    @param item_count       the number of Items in the Item array.
    @param op               the predicate operator
    @param[out] selectivity the calculated selectivity if a usable histogram was
                            found

    @retval true if an error occurred (the Item provided was not a constant
    value or similar).
    @return false if success
  */
  bool get_selectivity(Item **items, size_t item_count, enum_operator op,
                       double *selectivity) const;

  /**
    @return the fraction of non-null values in the histogram.
  */
  double get_non_null_values_fraction() const {
    return 1.0 - get_null_values_fraction();
  }
};

/** Return true if 'histogram' was built on an empty table.*/
inline bool empty(const Histogram &histogram) {
  return histogram.get_num_distinct_values() == 0 &&
         histogram.get_null_values_fraction() == 0.0;
}

/**
  Create a histogram from a value map.

  This function will build a histogram from a value map. The histogram type
  depends on both the size of the input data, as well as the number of buckets
  specified. If the number of distinct values is less than or equal to the
  number of buckets, a Singleton histogram will be created. Otherwise, an
  equi-height histogram will be created.

  The histogram will be allocated on the supplied mem_root, and it is the
  callers responsibility to properly clean up when the histogram isn't needed
  anymore.

  @param   mem_root        the MEM_ROOT where the histogram contents will be
                           allocated
  @param   value_map       a value map containing [value, frequency]
  @param   num_buckets     the maximum number of buckets to create
  @param   db_name         name of the database this histogram represents
  @param   tbl_name        name of the table this histogram represents
  @param   col_name        name of the column this histogram represents

  @return  a histogram, using at most "num_buckets" buckets. The histogram
           type depends on the size of the input data, and the number of
           buckets
*/
template <class T>
Histogram *build_histogram(MEM_ROOT *mem_root, const Value_map<T> &value_map,
                           size_t num_buckets, const std::string &db_name,
                           const std::string &tbl_name,
                           const std::string &col_name);

/**
  Create or update histograms for a set of columns of a given table.

  This function will try to create histogram statistics for all the columns
  specified. If one of the columns fail, it will continue to the next one and
  try.

  @param thd Thread handler.
  @param table The table where we should look for the columns/data.
  @param columns Columns specified by the user.
  @param num_buckets The maximum number of buckets to create in each
         histogram.
  @param data The histogram json literal for update
  @param results A map where the result of each operation is stored.

  @return false on success, true on error.
*/
bool update_histogram(THD *thd, Table_ref *table, const columns_set &columns,
                      int num_buckets, LEX_STRING data, results_map &results);

/**
  Drop histograms for all columns in a given table.

  @param thd Thread handler.
  @param table The table where we should look for the columns.
  @param original_table_def Original table definition.
  @param results A map where the result of each operation is stored.

  @note Assumes that caller owns exclusive metadata lock on the table,
        so there is no need to lock individual statistics.

  @return false on success, true on error.
*/
bool drop_all_histograms(THD *thd, Table_ref &table,
                         const dd::Table &original_table_def,
                         results_map &results);

/**
  Drop histograms for a set of columns in a given table.

  This function will try to drop the histogram statistics for all specified
  columns. If one of the columns fail, it will continue to the next one and try.

  @param thd Thread handler.
  @param table The table where we should look for the columns.
  @param columns Columns specified by the user.
  @param needs_lock Whether we need to acquire metadata locks on
                    the table and column statistics to be dropped.
  @param results A map where the result of each operation is stored.

  @note In case when needs_lock parameter is false assumes that caller
        owns exclusive metadata lock on the table, so there is no need
        to lock individual statistics.

  @return false on success, true on error.
*/
bool drop_histograms(THD *thd, Table_ref &table, const columns_set &columns,
                     bool needs_lock, results_map &results);

/**
  Rename histograms for all columns in a given table.

  @param thd             Thread handler.
  @param old_schema_name The old schema name
  @param old_table_name  The old table name
  @param new_schema_name The new schema name
  @param new_table_name  The new table name
  @param results         A map where the result of each operation is stored.

  @return false on success, true on error.
*/
bool rename_histograms(THD *thd, const char *old_schema_name,
                       const char *old_table_name, const char *new_schema_name,
                       const char *new_table_name, results_map &results);

bool find_histogram(THD *thd, const std::string &schema_name,
                    const std::string &table_name,
                    const std::string &column_name,
                    const Histogram **histogram);
}  // namespace histograms

#endif