1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
|
/* Copyright © Charliecloud contributors. */
#define _GNU_SOURCE
#include "config.h"
#include <string.h>
#include "all.h"
/** Functions **/
/** Append a copy of an element to a list.
@param ar[in,out] Address of the array to append to. This array must be
on the heap, and it may be copied to a new, larger heap
array if more space is needed, in which case @c *ar is
set to point to the new array. Can be `NULL` to create
a new array containing only @p new.
@param new[in] Address of the element to append.
@param size[in] Size of one list element in bytes.
Implementation note: We could round up the new size to the next power of
two for allocation purposes, which would reduce the number of realloc()
that actually change the size. However, many allocators do this type of
thing internally already, and that seems a better place for it.
> ![NOTE]
>
> @p ar must be type-cast because C cannot automatically convert to/from
> double pointer to void. Using `void **` as we do relies on all pointers
> having the same representation, which is true on most modern machines
> but not guaranteed by the C standard [1].
>
> We could return a pointer to the (possibly new) array instead of using
> an out parameter, which would avoid the double pointer and associated
> non-portability but make it easy for callers to create dangling
> pointers, i.e., after `a = list_append(b, ...)`, `b` will be invalid.
> The concern here is not memory leaks but rather that `b` points to an
> invalid buffer that likely *looks* valid.
> ![WARNING]
>
> If list elements are themselves pointers, it is easy to mis-count
> (de)reference operations and thus create a hard-to-debug junk list. For
> example, consider this (correct) case:
>
> char s[] = "skibidi";
> char **list = NULL;
> list_append((void **)&list, &s, sizeof(char *));
>
> This code maintains a array of strings, which in C means an array of
> pointers to `char`. That is, on a 64-bit machine, `list` is an array of
> 8-byte items, each of which is a pointer to `char`. Here, because we
> want to append the pointer `s` *itself* to `list`, we must pass the
> *address of* that pointer, i.e. `&s`. On ARM64, a hex memory view of
> `list` might then look something like:
>
> 78 56 34 12 ff 7f 00 00 00 00 00 00 00 00 00 00
>
> Here, the first 8 bytes are a stack address (0x00007fff12345678 in
> little-endian) and the second is the terminating zero.
>
> Consider, however, this subtly incorrect variation:
>
> list_append((void **)&list, s, sizeof(char *));
>
> Here, instead of appending the address of `s` like we wanted, this
> appends the *contents* of the string instead. Unfortunately, because
> `s` is itself a pointer, the line builds fine and runs without error,
> but the results are incorrect (as is common with memory handling bugs).
> An ARM64 hex memory view might look like:
>
> 73 6b 69 62 69 64 69 00 00 00 00 00 00 00 00 00
>
> Note that the first 8 bytes have the odor of ASCII characters rather
> than a stack (0x7fff ...) or heap (0x5555 ...) pointer.
[1]: https://c-faq.com/ptrs/genericpp.html */
void list_append(void **ar, void *new, size_t size)
{
size_t ct;
T__ (new != NULL);
ct = list_count(*ar, size);
*ar = realloc_ch(*ar, (ct + 2) * size, true);
memcpy((char *)*ar + ct * size, new, size);
memset((char *)*ar + (ct + 1) * size, 0, size);
}
/** Copy the contents of list @p src onto the end of @p dst.
If @p src is NULL, this is a no-op; specifically, if @p dst is NULL, it
remains NULL rather than becoming an allocated list with zero items. */
void list_cat(void **dst, void *src, size_t size)
{
size_t ct_dst, ct_src;
if (src == NULL)
return;
ct_dst = list_count(*dst, size);
ct_src = list_count(src, size);
*dst = realloc_ch(*dst, (ct_dst+ct_src+1)*size, true);
memcpy(*dst + ct_dst*size, src, ct_src*size); // append src (no overlap)
memset(*dst + (ct_dst+ct_src)*size, 0, size); // set new terminator
}
/** Copy a list.
@param src List to copy.
@param size Size of items in the lists.
@returns a shallow copy of @p src, i.e., if the contents of @p is
pointers, the buffers they point to are shared with the original list. */
void *list_copy(void *src, size_t size)
{
void *dst = NULL;
list_cat(&dst, src, size);
return dst;
}
/* Return the number of elements of size size in list *ar, not including the
terminating zero element. */
size_t list_count(void *ar, size_t size)
{
size_t ct;
if (ar == NULL)
return 0;
for (ct = 0; !buf_zero_p((char *)ar + ct*size, size); ct++)
;
return ct;
}
/** Remove duplicates from the list of strings (i.e., @c "char *") @p ar.
Strings are considered equal if they have the same content, i.e. we’re not
checking pointer values.
The list may be re-ordered arbitrarily.
Removal is done in-place and the array is not resized, so it may be
(harmlessly) too large after this function. */
void list_dedup_strings(char **ar)
{
int i, imax;
// Ensure we have at least one element in the array, i.e. both ar[0] and
// ar[1] must be valid, though the latter might be NULL.
if (ar == NULL || ar[0] == NULL)
return;
// Algorithm: Iterate upward through the array. For each string, compare it
// to all other strings later in the array. If there is a duplicate,
// replace that duplicate with an arbitrary later element and shorten the
// array by one.
i = 0;
imax = list_count(ar, sizeof(ar[0])) - 1;
while (i < imax) {
bool dup_found = false;
for (int j = i+1; j < imax; j++)
if (streq(ar[i], ar[j])) {
dup_found = true;
ar[j] = ar[imax]; // replace ar[j] with last element
ar[imax--] = NULL; // shorten array
break;
}
if (!dup_found)
i++; // ar[i] unique, go to next
}
}
/* Return a pointer to a new, empty zero-terminated array containing elements
of size size, with room for ct elements without re-allocation. The latter
allows to pre-allocate an arbitrary number of slots in the list, which can
then be filled directly without testing the list’s length for each one.
(The list is completely filled with zeros, so every position has a
terminator after it.) */
void *list_new(size_t ct, size_t size)
{
T__ (size > 0);
return malloc_zeroed((ct+1) * size, true);
}
/* Split str into tokens delimited by delim (multiple adjacent delimiters are
treated as one). Copy each token into a newly-allocated string buffer, and
return these strings as a new list.
The function accepts a single delimiter, not multiple like strtok(3). */
void *list_new_strings(char delim, const char *str)
{
char **list;
char *str_, *tok_state;
char delims[] = { delim, '\0' };
size_t delim_ct = 0;
// Count delimiters so we can allocate the right size list initially,
// avoiding one realloc() per delimiter. Note this does not account for
// adjacent delimiters and thus may overcount tokens, possibly wasting a
// small amount of memory.
for (int i = 0; str[i] != '\0'; i++)
delim_ct += (str[i] == delim ? 1 : 0);
list = list_new(delim_ct + 1, sizeof(char *));
// Note: strtok_r(3)’s interface is rather awkward; see its man page.
str_ = strdup_ch(str);
tok_state = NULL;
for (int i = 0; true; i++) {
char *tok;
tok = strtok_r(str_, delims, &tok_state);
if (tok == NULL)
break;
T__ (i < delim_ct + 1); // bounds check
list[i] = tok;
str_ = NULL; // only pass actual string on first call
}
return list;
}
/** Join list of strings with delimiter.
@param list Null-terminated array of strings, i.e. pointers to `char`.
@param delim Delimiter to use when joining.
@returns newly-allocated string containing all strings in @p list
concatenated with @p delim between them. If @p list is empty, return the
empty string. */
char *list_stringify(void *list, char delim)
{
char **strs = list; // typeify
size_t delim_ct = -1; // number of delimiters needed
size_t byte_ct = 0; // bytes needed by strings (not incl. delimiters)
char *next, *ret;
// Return early if list is empty.
if (strs[0] == NULL)
return strdup_ch("");
// Count how much memory we need.
for (int i = 0; strs[i] != NULL; i++) {
delim_ct++;
byte_ct += strlen(strs[i]);
}
// Build the output string.
ret = malloc_ch(delim_ct + byte_ct + 1, false);
next = ret;
for (int i = 0; strs[i] != NULL; i++) {
if (i > 0) {
*next++ = delim;
}
next = stpcpy(next, strs[i]);
}
T__ (ret[delim_ct + byte_ct] == '\0');
// Done.
return ret;
}
|