1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
|
From: Andrew Bower <andrew@bower.uk>
Date: Wed, 27 Nov 2024 23:33:27 +0000
Bug: https://github.com/t-brown/mcds/issues/38
Forwarded: https://github.com/t-brown/mcds/pull/39
Last-Update: 2025-01-18
Subject: Unfold vCard before using it.
Follow the RFC by unfolding folded vCard lines (CRLF WSP) before using
the vCard. This is done in place as we will be accessing all the data
immediately anyway as we pass the automata over it so it is likely to
stay in cache.
This pipelined approach seems easier than special handling of
continuation lines and follows the spirit of the specification.
Factors out regcomp() usage into a wrapper function that handles errors,
so that we can add additional regular expression usage without exploding
the code.
---
src/vcard.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++------------
src/vcard.h | 5 +--
src/xml.c | 2 +-
3 files changed, 88 insertions(+), 24 deletions(-)
diff --git a/src/vcard.c b/src/vcard.c
index 533e587..c1534fa 100644
--- a/src/vcard.c
+++ b/src/vcard.c
@@ -40,6 +40,82 @@
#include "mem.h"
#include "vcard.h"
+/**
+ * Compile regex, checking and handling errors.
+ *
+ * \parm[out] preg The compiled regex.
+ * \parm[in] regex The pattern to match.
+ * \parm[in] cflags The compilation flags according to regex(3).
+ *
+ * \retval 0 If there were no errors.
+ * \retval 1 If an error was encounted.
+ **/
+static int
+xregcomp(regex_t *preg, const char *regex, int cflags) {
+ int rerr = 0; /* Regex error code */
+ size_t rlen = 0; /* Regex error string length */
+ char *rstr = NULL; /* Regex error string */
+
+ rerr = regcomp(preg, regex, REG_EXTENDED | cflags);
+ if (rerr != 0) {
+ rlen = regerror(rerr, preg, NULL, 0);
+ rstr = xmalloc((rlen+1)*sizeof(char));
+ regerror(rerr, preg, rstr, rlen);
+ warnx(_("Unable to compile regex '%s': %s\n"), regex, rstr);
+ if (rstr) {
+ free(rstr);
+ rstr = NULL;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Unfold a vCard per RFC6350 section 3.2.
+ *
+ * It will remove the gaps between folded lines in-place.
+ *
+ * \parm[in,out] card The vcard.
+ *
+ * \retval 0 If there were no errors.
+ * \retval 1 If an error was encounted.
+ **/
+static int
+unfold(char *vcard)
+{
+ static const char r[] = "\r?\n[ \t]"; /* Continuation fold */
+ regmatch_t matches[1];
+ regex_t re;
+ size_t length = strlen(vcard);
+ size_t in_ptr = 0; /* AKA cut_to */
+ size_t out_ptr = 0; /* AKA cut_from */
+
+ if (xregcomp(&re, r, 0) != 0) {
+ return 1;
+ }
+
+ /* Hunt for folds and move the chunks inbetween them back by
+ * the accumulated number of folding characters. */
+ while (regexec(&re, vcard + in_ptr, 1, matches, 0) == 0) {
+ if (matches[0].rm_so == -1 || matches[0].rm_eo == -1) {
+ errx(EXIT_FAILURE, _("inconsistent regex result"));
+ }
+ memmove(vcard + out_ptr,
+ vcard + in_ptr,
+ matches[0].rm_so);
+ in_ptr = in_ptr + matches[0].rm_eo;
+ out_ptr = out_ptr + matches[0].rm_so;
+ }
+ if (options.verbose) {
+ fprintf(stderr, "Unfolding cut %zd bytes\n", in_ptr - out_ptr);
+ }
+ memmove(vcard + out_ptr, vcard + in_ptr, length - in_ptr + 1);
+
+ regfree(&re);
+ return 0;
+}
+
/**
* Search a query's result. This will run regexs over the result
* to filter the data.
@@ -54,7 +130,7 @@
* \retval 1 If an error was encounted.
**/
int
-search(const char *card)
+search(char *card)
{
/* Regex patterns */
static const char r[] = "%s(.*):(.*)"; /* Whole result */
@@ -63,8 +139,6 @@ search(const char *card)
int plen = 0; /* Length of snprintf()'s */
int rerr = 0; /* Regex error code */
- size_t rlen = 0; /* Regex error string length */
- char *rstr = NULL; /* Regex error string */
size_t qlen = 0; /* Length of the query string */
char *q = NULL; /* Regex pattern for query */
@@ -78,6 +152,11 @@ search(const char *card)
regmatch_t match[3] = {0}; /* Regex matches */
+ if (unfold(card)) {
+ warnx(_("Error unfolding vCard."));
+ return(EXIT_FAILURE);
+ }
+
/* Generate a quoted query term */
if (quote(options.term, &qt)) {
warnx(_("Unable to build quoted term."));
@@ -96,15 +175,7 @@ search(const char *card)
return(EXIT_FAILURE);
}
- if ((rerr = regcomp(&rq, q, REG_EXTENDED|REG_NEWLINE|REG_ICASE)) != 0) {
- rlen = regerror(rerr, &rq, NULL, 0);
- rstr = xmalloc((rlen+1)*sizeof(char));
- regerror(rerr, &rq, rstr, rlen);
- warnx(_("Unable to compile regex '%s': %s\n"), q, rstr);
- if (rstr) {
- free(rstr);
- rstr = NULL;
- }
+ if (xregcomp(&rq, q, REG_NEWLINE|REG_ICASE) != 0) {
return(EXIT_FAILURE);
}
@@ -119,15 +190,7 @@ search(const char *card)
return(EXIT_FAILURE);
}
- if ((rerr = regcomp(&rs, s, REG_EXTENDED|REG_NEWLINE)) != 0) {
- rlen = regerror(rerr, &rs, NULL, 0);
- rstr = xmalloc((rlen+1)*sizeof(char));
- regerror(rerr, &rs, rstr, rlen);
- warnx(_("Unable to compile regex '%s': %s\n"), s, rstr);
- if (rstr) {
- free(rstr);
- rstr = NULL;
- }
+ if (xregcomp(&rs, s, REG_NEWLINE) != 0) {
return(EXIT_FAILURE);
}
diff --git a/src/vcard.h b/src/vcard.h
index 55f448c..80ed5dc 100644
--- a/src/vcard.h
+++ b/src/vcard.h
@@ -32,8 +32,9 @@ extern "C"
{
#endif
-/** Search the vcard */
-int search(const char *);
+/** Search the vcard.
+ * The supplied card string will be unfolded in place so must be modifiable. */
+int search(char *);
/** Quote a string for regex's */
int quote(const char *, char **);
diff --git a/src/xml.c b/src/xml.c
index 5f2e1b3..802f2f5 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -107,7 +107,7 @@ walk_tree(xmlDocPtr doc, xmlNode *node)
_("Data:\n%s\n"),
data);
}
- search((const char *)data);
+ search((char *)data);
xmlFree(data);
}
}
|