File: fts0types.ic

package info (click to toggle)
mysql-8.0 8.0.44-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,272,892 kB
  • sloc: cpp: 4,685,345; ansic: 412,712; pascal: 108,395; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; python: 21,816; sh: 17,285; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,083; makefile: 1,793; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (206 lines) | stat: -rw-r--r-- 7,209 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/*****************************************************************************

Copyright (c) 2007, 2025, Oracle and/or its affiliates.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Free Software Foundation.

This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation.  The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA

*****************************************************************************/

/** @file include/fts0types.ic
 Full text search types.

 Created 2007-03-27 Sunny Bains
 *******************************************************/

#ifndef INNOBASE_FTS0TYPES_IC
#define INNOBASE_FTS0TYPES_IC

#include "ha_prototypes.h"
#include "rem0cmp.h"

/** Duplicate a string. */
inline void fts_string_dup(fts_string_t *dst,       /*!< in: dup to here */
                           const fts_string_t *src, /*!< in: src string */
                           mem_heap_t *heap)        /*!< in: heap to use */
{
  dst->f_str = (byte *)mem_heap_alloc(heap, src->f_len + 1);
  memcpy(dst->f_str, src->f_str, src->f_len);

  dst->f_len = src->f_len;
  dst->f_str[src->f_len] = 0;
  dst->f_n_char = src->f_n_char;
}

/** Compare two doc_ids.
 @param[in] id1  1st doc_id to compare
 @param[in] id2  2nd doc_id to compare
 @return < 0 if id1 < id2, 0 if id1 == id2, > 0 if id1 > id2 */
inline int fts_doc_id_cmp(doc_id_t id1, doc_id_t id2) {
  if (id1 < id2) {
    return -1;
  } else if (id1 > id2) {
    return 1;
  } else {
    return 0;
  }
}

/** Compare doc_ids of 2 objects.
 @param[in] p1 Pointer to first instance o1 of T
 @param[in] p2 Pointer to second instance o2 of T
 @return sign(o1->doc_id - o2->doc_id) */
template <typename T>
int fts_doc_id_field_cmp(const void *p1, const void *p2) {
  const T *o1 = static_cast<const T *>(p1);
  const T *o2 = static_cast<const T *>(p2);

  return fts_doc_id_cmp(o1->doc_id, o2->doc_id);
}

/** Get the first character's code position for FTS index partition
@param[in]      cs        Character set
@param[in]      p2        string
@param[in]      len2    string length
*/
extern ulint innobase_strnxfrm(const CHARSET_INFO *cs, const uchar *p2,
                               const ulint len2);

/** Check if fts index charset is cjk
@param[in]      cs      charset
@retval true    if the charset is cjk
@retval false   if not. */
inline bool fts_is_charset_cjk(const CHARSET_INFO *cs) {
  if (strcmp(cs->m_coll_name, "gb2312_chinese_ci") == 0 ||
      strcmp(cs->m_coll_name, "gbk_chinese_ci") == 0 ||
      strcmp(cs->m_coll_name, "big5_chinese_ci") == 0 ||
      strcmp(cs->m_coll_name, "gb18030_chinese_ci") == 0 ||
      strcmp(cs->m_coll_name, "ujis_japanese_ci") == 0 ||
      strcmp(cs->m_coll_name, "sjis_japanese_ci") == 0 ||
      strcmp(cs->m_coll_name, "cp932_japanese_ci") == 0 ||
      strcmp(cs->m_coll_name, "eucjpms_japanese_ci") == 0 ||
      strcmp(cs->m_coll_name, "euckr_korean_ci") == 0) {
    return (true);
  } else {
    return (false);
  }
}

/** Select the FTS auxiliary index for the given character by range.
@param[in]      cs      charset
@param[in]      str     string
@param[in]      len     string length
@retval the index to use for the string */
inline ulint fts_select_index_by_range(const CHARSET_INFO *cs, const byte *str,
                                       ulint len) {
  ulint selected = 0;
  ulint value = innobase_strnxfrm(cs, str, len);

  while (fts_index_selector[selected].value != 0) {
    if (fts_index_selector[selected].value == value) {
      return (selected);

    } else if (fts_index_selector[selected].value > value) {
      return (selected > 0 ? selected - 1 : 0);
    }

    ++selected;
  }

  ut_ad(selected > 1);

  return (selected - 1);
}

/** Select the FTS auxiliary index for the given character by hash.
@param[in]      cs      charset
@param[in]      str     string
@param[in]      len     string length
@retval the index to use for the string */
inline ulint fts_select_index_by_hash(const CHARSET_INFO *cs, const byte *str,
                                      ulint len) {
  int char_len;

  ut_ad(!(str == nullptr && len > 0));

  if (str == nullptr || len == 0) {
    return 0;
  }

  /* Get the first char */
  char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char *>(str),
                              reinterpret_cast<const char *>(str + len));
  ut_ad(static_cast<ulint>(char_len) <= len);

  /*
    Get collation hash code. Force truncation to ulong for legacy reasons;
    it gives different results for Windows and Linux, but it needs to match
    on-disk data.
   */
  uint64_t nr1 = 1;
  uint64_t nr2 = 4;
  cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);

  return (static_cast<ulong>(nr1) % FTS_NUM_AUX_INDEX);
}

/** Select the FTS auxiliary table for the given character.
@param[in]      cs      charset
@param[in]      str     string
@param[in]      len     string length in bytes
@retval the auxiliary table number to use for the string, zero-based */
inline ulint fts_select_index(const CHARSET_INFO *cs, const byte *str,
                              ulint len) {
  /* Words which compare equal using the character set's collation (have
  the same sort order) MUST go into the same auxiliary table.
  This is necessary as selecting a word using the equality operator will
  select all words equal in the table's/field's collation order.
  Other parts of FTS (e.g. index optimization) depend on this property.

  This property is guaranteed by using collation-provided transforms,
  which provide binary-equal values for collation-equal arguments:
  weight string (strnxfrm) for alphabetic scripts, and collation hash
  for non-alphabetic (CJK = Chinese, Korean and Japanese). */
  ulint selected;

  if (fts_is_charset_cjk(cs)) {
    selected = fts_select_index_by_hash(cs, str, len);
  } else {
    selected = fts_select_index_by_range(cs, str, len);
  }

  return (selected);
}

/** Return the selected FTS aux index suffix. */
inline const char *fts_get_suffix(ulint selected) /*!< in: selected index */
{
  return (fts_index_selector[selected].suffix);
}

/** Return the selected FTS aux index suffix in 5.7 compatible format
@param[in]      selected        selected index
@return the suffix name */
inline const char *fts_get_suffix_5_7(ulint selected) {
  return (fts_index_selector_5_7[selected].suffix);
}

#endif /* INNOBASE_FTS0TYPES_IC */