File: strinfo.c

package info (click to toggle)
glib2.0 2.58.3-2%2Bdeb10u2
  • links: PTS, VCS
  • area: main
  • in suites: buster, buster-backports
  • size: 48,956 kB
  • sloc: ansic: 452,656; xml: 16,781; python: 6,149; makefile: 3,776; sh: 1,499; perl: 1,140; cpp: 9
file content (348 lines) | stat: -rw-r--r-- 10,630 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
/*
 * Copyright © 2010 Codethink Limited
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 *
 * Author: Ryan Lortie <desrt@desrt.ca>
 */

#include <string.h>
#include <glib.h>

/*
 * The string info map is an efficient data structure designed to be
 * used with a small set of items.  It is used by GSettings schemas for
 * three purposes:
 *
 *  1) Implement <choices> with a list of valid strings
 *
 *  2) Implement <alias> by mapping one string to another
 *
 *  3) Implement enumerated types by mapping strings to integer values
 *     (and back).
 *
 * The map is made out of an array of uint32s.  Each entry in the array
 * is an integer value, followed by a specially formatted string value:
 *
 *   The string starts with the byte 0xff or 0xfe, followed by the
 *   content of the string, followed by a nul byte, followed by
 *   additional nul bytes for padding, followed by a 0xff byte.
 *
 *   Padding is added so that the entire formatted string takes up a
 *   multiple of 4 bytes, and not less than 8 bytes.  The requirement
 *   for a string to take up 8 bytes is so that the scanner doesn't lose
 *   synch and mistake a string for an integer value.
 *
 * The first byte of the formatted string depends on if the integer is
 * an enum value (0xff) or an alias (0xfe).  If it is an alias then the
 * number refers to the word offset within the info map at which the
 * integer corresponding to the "target" value is stored.
 *
 * For example, consider the case of the string info map representing an
 * enumerated type of 'foo' (value 1) and 'bar' (value 2) and 'baz'
 * (alias for 'bar').  Note that string info maps are always little
 * endian.
 *
 * x01 x00 x00 x00   xff 'f' 'o' 'o'   x00 x00 x00 xff   x02 x00 x00 x00
 * xff 'b' 'a' 'r'   x00 x00 x00 xff   x03 x00 x00 x00   xfe 'b' 'a' 'z'
 * x00 x00 x00 xff
 *
 *
 * The operations that someone may want to perform with the map:
 *
 *   - lookup if a string is valid (and not an alias)
 *   - lookup the integer value for a enum 'nick'
 *   - lookup the integer value for the target of an alias
 *   - lookup an alias and convert it to its target string
 *   - lookup the enum nick for a given value
 *
 * In order to lookup if a string is valid, it is padded on either side
 * (as described) and scanned for in the array.  For example, you might
 * look for "foo":
 *
 *                   xff 'f' 'o' 'o'   x00 x00 x00 xff
 *
 * In order to lookup the integer value for a nick, the string is padded
 * on either side and scanned for in the array, as above.  Instead of
 * merely succeeding, we look at the integer value to the left of the
 * match.  This is the enum value.
 *
 * In order to lookup an alias and convert it to its target enum value,
 * the string is padded on either side (as described, with 0xfe) and
 * scanned for.  For example, you might look for "baz":
 *
 *                   xfe 'b' 'a' 'z'  x00 x00 x00 xff
 *
 * The integer immediately preceding the match then contains the offset
 * of the integer value of the target.  In our example, that's '3'.
 * This index is dereferenced to find the enum value of '2'.
 *
 * To convert the alias to its target string, 5 bytes just need to be
 * added past the start of the integer value to find the start of the
 * string.
 *
 * To lookup the enum nick for a given value, the value is searched for
 * in the array.  To ensure that the value isn't matching the inside of a
 * string, we must check that it is either the first item in the array or
 * immediately preceded by the byte 0xff.  It must also be immediately
 * followed by the byte 0xff.
 *
 * Because strings always take up a minimum of 2 words, because 0xff or
 * 0xfe never appear inside of a utf-8 string and because no two integer
 * values ever appear in sequence, the only way we can have the
 * sequence:
 *
 *     xff __ __ __ __ xff (or 0xfe)
 *
 * is in the event of an integer nested between two strings.
 *
 * For implementation simplicity/efficiency, strings may not be more
 * than 65 characters in length (ie: 17 32bit words after padding).
 *
 * In the event that we are doing <choices> (ie: not an enum type) then
 * the value of each choice is set to zero and ignored.
 */

#define STRINFO_MAX_WORDS   17
G_GNUC_UNUSED static guint
strinfo_string_to_words (const gchar *string,
                         guint32     *words,
                         gboolean     alias)
{
  guint n_words;
  gsize size;

  size = strlen (string);

  n_words = MAX (2, (size + 6) >> 2);

  if (n_words > STRINFO_MAX_WORDS)
    return FALSE;

  words[0] = GUINT32_TO_LE (alias ? 0xfe : 0xff);
  words[n_words - 1] = GUINT32_TO_BE (0xff);
  memcpy (((gchar *) words) + 1, string, size + 1);

  return n_words;
}

G_GNUC_UNUSED static gint
strinfo_scan (const guint32 *strinfo,
              guint          length,
              const guint32 *words,
              guint          n_words)
{
  guint i = 0;

  if (length < n_words)
    return -1;

  while (i <= length - n_words)
    {
      guint j = 0;

      for (j = 0; j < n_words; j++)
        if (strinfo[i + j] != words[j])
          break;

      if (j == n_words)
        return i;   /* match */

      /* skip at least one word, continue */
      i += j ? j : 1;
    }

  return -1;
}

G_GNUC_UNUSED static gint
strinfo_find_string (const guint32 *strinfo,
                     guint          length,
                     const gchar   *string,
                     gboolean       alias)
{
  guint32 words[STRINFO_MAX_WORDS];
  guint n_words;

  if (length == 0)
    return -1;

  n_words = strinfo_string_to_words (string, words, alias);

  return strinfo_scan (strinfo + 1, length - 1, words, n_words);
}

G_GNUC_UNUSED static gint
strinfo_find_integer (const guint32 *strinfo,
                      guint          length,
                      guint32        value)
{
  guint i;

  for (i = 0; i < length; i++)
    if (strinfo[i] == GUINT32_TO_LE (value))
      {
        const guchar *charinfo = (const guchar *) &strinfo[i];

        /* make sure it has 0xff on either side */
        if ((i == 0 || charinfo[-1] == 0xff) && charinfo[4] == 0xff)
          return i;
      }

  return -1;
}

G_GNUC_UNUSED static gboolean
strinfo_is_string_valid (const guint32 *strinfo,
                         guint          length,
                         const gchar   *string)
{
  return strinfo_find_string (strinfo, length, string, FALSE) != -1;
}

G_GNUC_UNUSED static gboolean
strinfo_enum_from_string (const guint32 *strinfo,
                          guint          length,
                          const gchar   *string,
                          guint         *result)
{
  gint index;

  index = strinfo_find_string (strinfo, length, string, FALSE);

  if (index < 0)
    return FALSE;

  *result = GUINT32_FROM_LE (strinfo[index]);
  return TRUE;
}

G_GNUC_UNUSED static const gchar *
strinfo_string_from_enum (const guint32 *strinfo,
                          guint          length,
                          guint          value)
{
  gint index;

  index = strinfo_find_integer (strinfo, length, value);

  if (index < 0)
    return NULL;

  return 1 + (const gchar *) &strinfo[index + 1];
}

G_GNUC_UNUSED static const gchar *
strinfo_string_from_alias (const guint32 *strinfo,
                           guint          length,
                           const gchar   *alias)
{
  gint index;

  index = strinfo_find_string (strinfo, length, alias, TRUE);

  if (index < 0)
    return NULL;

  return 1 + (const gchar *) &strinfo[GUINT32_TO_LE (strinfo[index]) + 1];
}

G_GNUC_UNUSED static GVariant *
strinfo_enumerate (const guint32 *strinfo,
                   guint          length)
{
  GVariantBuilder builder;
  const gchar *ptr, *end;

  ptr = (gpointer) strinfo;
  end = ptr + 4 * length;

  ptr += 4;

  g_variant_builder_init (&builder, G_VARIANT_TYPE_STRING_ARRAY);

  while (ptr < end)
    {
      /* don't include aliases */
      if (*ptr == '\xff')
        g_variant_builder_add (&builder, "s", ptr + 1);

      /* find the end of this string */
      ptr = memchr (ptr, '\xff', end - ptr);
      g_assert (ptr != NULL);

      /* skip over the int to the next string */
      ptr += 5;
    }

  return g_variant_builder_end (&builder);
}

G_GNUC_UNUSED static void
strinfo_builder_append_item (GString     *builder,
                             const gchar *string,
                             guint        value)
{
  guint32 words[STRINFO_MAX_WORDS];
  guint n_words;

  value = GUINT32_TO_LE (value);

  n_words = strinfo_string_to_words (string, words, FALSE);
  g_string_append_len (builder, (void *) &value, sizeof value);
  g_string_append_len (builder, (void *) words, 4 * n_words);
}

G_GNUC_UNUSED static gboolean
strinfo_builder_append_alias (GString     *builder,
                              const gchar *alias,
                              const gchar *target)
{
  guint32 words[STRINFO_MAX_WORDS];
  guint n_words;
  guint value;
  gint index;

  index = strinfo_find_string ((const guint32 *) builder->str,
                               builder->len / 4, target, FALSE);

  if (index == -1)
    return FALSE;

  value = GUINT32_TO_LE (index);

  n_words = strinfo_string_to_words (alias, words, TRUE);
  g_string_append_len (builder, (void *) &value, sizeof value);
  g_string_append_len (builder, (void *) words, 4 * n_words);

  return TRUE;
}

G_GNUC_UNUSED static gboolean
strinfo_builder_contains (GString     *builder,
                          const gchar *string)
{
  return strinfo_find_string ((const guint32 *) builder->str,
                              builder->len / 4, string, FALSE) != -1 ||
         strinfo_find_string ((const guint32 *) builder->str,
                              builder->len / 4, string, TRUE) != -1;
}

G_GNUC_UNUSED static gboolean
strinfo_builder_contains_value (GString *builder,
                                guint    value)
{
  return strinfo_string_from_enum ((const guint32 *) builder->str,
                                   builder->len / 4, value) != NULL;
}