File: list.c

package info (click to toggle)
charliecloud 0.43-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,116 kB
  • sloc: python: 6,021; sh: 4,284; ansic: 3,863; makefile: 598
file content (266 lines) | stat: -rw-r--r-- 9,110 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/* Copyright © Charliecloud contributors. */

#define _GNU_SOURCE
#include "config.h"

#include <string.h>

#include "all.h"


/** Functions **/

/** Append a copy of an element to a list.

    @param ar[in,out]  Address of the array to append to. This array must be
                       on the heap, and it may be copied to a new, larger heap
                       array if more space is needed, in which case @c *ar is
                       set to point to the new array. Can be `NULL` to create
                       a new array containing only @p new.

    @param new[in]     Address of the element to append.

    @param size[in]    Size of one list element in bytes.

    Implementation note: We could round up the new size to the next power of
    two for allocation purposes, which would reduce the number of realloc()
    that actually change the size. However, many allocators do this type of
    thing internally already, and that seems a better place for it.

    > ![NOTE]
    >
    > @p ar must be type-cast because C cannot automatically convert to/from
    > double pointer to void. Using `void **` as we do relies on all pointers
    > having the same representation, which is true on most modern machines
    > but not guaranteed by the C standard [1].
    >
    > We could return a pointer to the (possibly new) array instead of using
    > an out parameter, which would avoid the double pointer and associated
    > non-portability but make it easy for callers to create dangling
    > pointers, i.e., after `a = list_append(b, ...)`, `b` will be invalid.
    > The concern here is not memory leaks but rather that `b` points to an
    > invalid buffer that likely *looks* valid.

    > ![WARNING]
    >
    > If list elements are themselves pointers, it is easy to mis-count
    > (de)reference operations and thus create a hard-to-debug junk list. For
    > example, consider this (correct) case:
    >
    >   char s[] = "skibidi";
    >   char **list = NULL;
    >   list_append((void **)&list, &s, sizeof(char *));
    >
    > This code maintains a array of strings, which in C means an array of
    > pointers to `char`. That is, on a 64-bit machine, `list` is an array of
    > 8-byte items, each of which is a pointer to `char`. Here, because we
    > want to append the pointer `s` *itself* to `list`, we must pass the
    > *address of* that pointer, i.e. `&s`. On ARM64, a hex memory view of
    > `list` might then look something like:
    >
    >   78 56 34 12 ff 7f 00 00  00 00 00 00 00 00 00 00
    >
    > Here, the first 8 bytes are a stack address (0x00007fff12345678 in
    > little-endian) and the second is the terminating zero.
    >
    > Consider, however, this subtly incorrect variation:
    >
    >   list_append((void **)&list, s, sizeof(char *));
    >
    > Here, instead of appending the address of `s` like we wanted, this
    > appends the *contents* of the string instead. Unfortunately, because
    > `s` is itself a pointer, the line builds fine and runs without error,
    > but the results are incorrect (as is common with memory handling bugs).
    > An ARM64 hex memory view might look like:
    >
    >   73 6b 69 62 69 64 69 00  00 00 00 00 00 00 00 00
    >
    > Note that the first 8 bytes have the odor of ASCII characters rather
    > than a stack (0x7fff ...) or heap (0x5555 ...) pointer.

    [1]: https://c-faq.com/ptrs/genericpp.html */
void list_append(void **ar, void *new, size_t size)
{
   size_t ct;
   T__ (new != NULL);

   ct = list_count(*ar, size);
   *ar = realloc_ch(*ar, (ct + 2) * size, true);
   memcpy((char *)*ar + ct * size, new, size);
   memset((char *)*ar + (ct + 1) * size, 0, size);
}

/** Copy the contents of list @p src onto the end of @p dst.

    If @p src is NULL, this is a no-op; specifically, if @p dst is NULL, it
    remains NULL rather than becoming an allocated list with zero items. */
void list_cat(void **dst, void *src, size_t size)
{
   size_t ct_dst, ct_src;

   if (src == NULL)
      return;

   ct_dst = list_count(*dst, size);
   ct_src = list_count(src, size);
   *dst = realloc_ch(*dst, (ct_dst+ct_src+1)*size, true);
   memcpy(*dst + ct_dst*size, src, ct_src*size);  // append src (no overlap)
   memset(*dst + (ct_dst+ct_src)*size, 0, size);  // set new terminator
}

/** Copy a list.

      @param src   List to copy.

      @param size  Size of items in the lists.

    @returns a shallow copy of @p src, i.e., if the contents of @p is
    pointers, the buffers they point to are shared with the original list. */
void *list_copy(void *src, size_t size)
{
   void *dst = NULL;
   list_cat(&dst, src, size);
   return dst;
}

/* Return the number of elements of size size in list *ar, not including the
   terminating zero element. */
size_t list_count(void *ar, size_t size)
{
   size_t ct;

   if (ar == NULL)
      return 0;

   for (ct = 0; !buf_zero_p((char *)ar + ct*size, size); ct++)
      ;
   return ct;
}

/** Remove duplicates from the list of strings (i.e., @c "char *") @p ar.

    Strings are considered equal if they have the same content, i.e. we’re not
    checking pointer values.

    The list may be re-ordered arbitrarily.

    Removal is done in-place and the array is not resized, so it may be
    (harmlessly) too large after this function. */
void list_dedup_strings(char **ar)
{
   int i, imax;

   // Ensure we have at least one element in the array, i.e. both ar[0] and
   // ar[1] must be valid, though the latter might be NULL.
   if (ar == NULL || ar[0] == NULL)
      return;

   // Algorithm: Iterate upward through the array. For each string, compare it
   // to all other strings later in the array. If there is a duplicate,
   // replace that duplicate with an arbitrary later element and shorten the
   // array by one.
   i = 0;
   imax = list_count(ar, sizeof(ar[0])) - 1;
   while (i < imax) {
      bool dup_found = false;
      for (int j = i+1; j < imax; j++)
         if (streq(ar[i], ar[j])) {
            dup_found = true;
            ar[j] = ar[imax];   // replace ar[j] with last element
            ar[imax--] = NULL;  // shorten array
            break;
         }
      if (!dup_found)
         i++;                   // ar[i] unique, go to next
   }
}

/* Return a pointer to a new, empty zero-terminated array containing elements
   of size size, with room for ct elements without re-allocation. The latter
   allows to pre-allocate an arbitrary number of slots in the list, which can
   then be filled directly without testing the list’s length for each one.
   (The list is completely filled with zeros, so every position has a
   terminator after it.) */
void *list_new(size_t ct, size_t size)
{
   T__ (size > 0);
   return malloc_zeroed((ct+1) * size, true);
}

/* Split str into tokens delimited by delim (multiple adjacent delimiters are
   treated as one). Copy each token into a newly-allocated string buffer, and
   return these strings as a new list.

   The function accepts a single delimiter, not multiple like strtok(3). */
void *list_new_strings(char delim, const char *str)
{
   char **list;
   char *str_, *tok_state;
   char delims[] = { delim, '\0' };
   size_t delim_ct = 0;

   // Count delimiters so we can allocate the right size list initially,
   // avoiding one realloc() per delimiter. Note this does not account for
   // adjacent delimiters and thus may overcount tokens, possibly wasting a
   // small amount of memory.
   for (int i = 0; str[i] != '\0'; i++)
      delim_ct += (str[i] == delim ? 1 : 0);

   list = list_new(delim_ct + 1, sizeof(char *));

   // Note: strtok_r(3)’s interface is rather awkward; see its man page.
   str_ = strdup_ch(str);
   tok_state = NULL;
   for (int i = 0; true; i++) {
      char *tok;
      tok = strtok_r(str_, delims, &tok_state);
      if (tok == NULL)
         break;
      T__ (i < delim_ct + 1);  // bounds check
      list[i] = tok;
      str_ = NULL;             // only pass actual string on first call
   }

   return list;
}

/** Join list of strings with delimiter.

    @param list   Null-terminated array of strings, i.e. pointers to `char`.

    @param delim  Delimiter to use when joining.

    @returns newly-allocated string containing all strings in @p list
    concatenated with @p delim between them. If @p list is empty, return the
    empty string. */
char *list_stringify(void *list, char delim)
{
   char **strs = list;    // typeify
   size_t delim_ct = -1;  // number of delimiters needed
   size_t byte_ct = 0;    // bytes needed by strings (not incl. delimiters)
   char *next, *ret;

   // Return early if list is empty.
   if (strs[0] == NULL)
      return strdup_ch("");

   // Count how much memory we need.
   for (int i = 0; strs[i] != NULL; i++) {
      delim_ct++;
      byte_ct += strlen(strs[i]);
   }

   // Build the output string.
   ret = malloc_ch(delim_ct + byte_ct + 1, false);
   next = ret;
   for (int i = 0; strs[i] != NULL; i++) {
      if (i > 0) {
         *next++ = delim;
      }
      next = stpcpy(next, strs[i]);
   }
   T__ (ret[delim_ct + byte_ct] == '\0');

   // Done.
   return ret;
}