From: Andrew Bower <andrew@bower.uk>
Date: Wed, 27 Nov 2024 23:33:27 +0000
Bug: https://github.com/t-brown/mcds/issues/38
Forwarded: https://github.com/t-brown/mcds/pull/39
Last-Update: 2025-01-18
Subject: Unfold vCard before using it.

Follow the RFC by unfolding folded vCard lines (CRLF WSP) before using
the vCard. This is done in place as we will be accessing all the data
immediately anyway as we pass the automata over it so it is likely to
stay in cache.

This pipelined approach seems easier than special handling of
continuation lines and follows the spirit of the specification.

Factors out regcomp() usage into a wrapper function that handles errors,
so that we can add additional regular expression usage without exploding
the code.
---
 src/vcard.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++------------
 src/vcard.h |   5 +--
 src/xml.c   |   2 +-
 3 files changed, 88 insertions(+), 24 deletions(-)

diff --git a/src/vcard.c b/src/vcard.c
index 533e587..c1534fa 100644
--- a/src/vcard.c
+++ b/src/vcard.c
@@ -40,6 +40,82 @@
 #include "mem.h"
 #include "vcard.h"
 
+/**
+ * Compile regex, checking and handling errors.
+ *
+ * \parm[out] preg The compiled regex.
+ * \parm[in] regex The pattern to match.
+ * \parm[in] cflags The compilation flags according to regex(3).
+ *
+ * \retval 0 If there were no errors.
+ * \retval 1 If an error was encounted.
+ **/
+static int
+xregcomp(regex_t *preg, const char *regex, int cflags) {
+	int rerr = 0;			/* Regex error code */
+	size_t rlen = 0;		/* Regex error string length */
+	char *rstr = NULL;		/* Regex error string */
+
+	rerr = regcomp(preg, regex, REG_EXTENDED | cflags);
+	if (rerr != 0) {
+		rlen = regerror(rerr, preg, NULL, 0);
+		rstr = xmalloc((rlen+1)*sizeof(char));
+		regerror(rerr, preg, rstr, rlen);
+		warnx(_("Unable to compile regex '%s': %s\n"), regex, rstr);
+		if (rstr) {
+			free(rstr);
+			rstr = NULL;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * Unfold a vCard per RFC6350 section 3.2.
+ *
+ * It will remove the gaps between folded lines in-place.
+ *
+ * \parm[in,out] card The vcard.
+ *
+ * \retval 0 If there were no errors.
+ * \retval 1 If an error was encounted.
+ **/
+static int
+unfold(char *vcard)
+{
+	static const char r[] = "\r?\n[ \t]";     /* Continuation fold */
+	regmatch_t matches[1];
+	regex_t re;
+	size_t length = strlen(vcard);
+	size_t in_ptr = 0; /* AKA cut_to */
+	size_t out_ptr = 0; /* AKA cut_from */
+
+	if (xregcomp(&re, r, 0) != 0) {
+		return 1;
+	}
+
+	/* Hunt for folds and move the chunks inbetween them back by
+	 * the accumulated number of folding characters. */
+	while (regexec(&re, vcard + in_ptr, 1, matches, 0) == 0) {
+		if (matches[0].rm_so == -1 || matches[0].rm_eo == -1) {
+			errx(EXIT_FAILURE, _("inconsistent regex result"));
+		}
+		memmove(vcard + out_ptr,
+			vcard + in_ptr,
+			matches[0].rm_so);
+		in_ptr   = in_ptr + matches[0].rm_eo;
+		out_ptr  = out_ptr + matches[0].rm_so;
+	}
+	if (options.verbose) {
+		fprintf(stderr, "Unfolding cut %zd bytes\n", in_ptr - out_ptr);
+	}
+	memmove(vcard + out_ptr, vcard + in_ptr, length - in_ptr + 1);
+
+	regfree(&re);
+	return 0;
+}
+
 /**
  * Search a query's result. This will run regexs over the result
  * to filter the data.
@@ -54,7 +130,7 @@
  * \retval 1 If an error was encounted.
  **/
 int
-search(const char *card)
+search(char *card)
 {
 	/* Regex patterns */
 	static const char r[] = "%s(.*):(.*)";     /* Whole result */
@@ -63,8 +139,6 @@ search(const char *card)
 	int plen = 0;			/* Length of snprintf()'s */
 
 	int rerr = 0;			/* Regex error code */
-	size_t rlen = 0;		/* Regex error string length */
-	char *rstr = NULL;		/* Regex error string */
 
 	size_t qlen = 0;		/* Length of the query string */
 	char *q = NULL;			/* Regex pattern for query */
@@ -78,6 +152,11 @@ search(const char *card)
 
 	regmatch_t match[3] = {0};	/* Regex matches */
 
+	if (unfold(card)) {
+		warnx(_("Error unfolding vCard."));
+		return(EXIT_FAILURE);
+	}
+
 	/* Generate a quoted query term */
 	if (quote(options.term, &qt)) {
 		warnx(_("Unable to build quoted term."));
@@ -96,15 +175,7 @@ search(const char *card)
 		return(EXIT_FAILURE);
 	}
 
-	if ((rerr = regcomp(&rq, q, REG_EXTENDED|REG_NEWLINE|REG_ICASE)) != 0) {
-		rlen = regerror(rerr, &rq, NULL, 0);
-		rstr = xmalloc((rlen+1)*sizeof(char));
-		regerror(rerr, &rq, rstr, rlen);
-		warnx(_("Unable to compile regex '%s': %s\n"), q, rstr);
-		if (rstr) {
-			free(rstr);
-			rstr = NULL;
-		}
+	if (xregcomp(&rq, q, REG_NEWLINE|REG_ICASE) != 0) {
 		return(EXIT_FAILURE);
 	}
 
@@ -119,15 +190,7 @@ search(const char *card)
 		return(EXIT_FAILURE);
 	}
 
-	if ((rerr = regcomp(&rs, s, REG_EXTENDED|REG_NEWLINE)) != 0) {
-		rlen = regerror(rerr, &rs, NULL, 0);
-		rstr = xmalloc((rlen+1)*sizeof(char));
-		regerror(rerr, &rs, rstr, rlen);
-		warnx(_("Unable to compile regex '%s': %s\n"), s, rstr);
-		if (rstr) {
-			free(rstr);
-			rstr = NULL;
-		}
+	if (xregcomp(&rs, s, REG_NEWLINE) != 0) {
 		return(EXIT_FAILURE);
 	}
 
diff --git a/src/vcard.h b/src/vcard.h
index 55f448c..80ed5dc 100644
--- a/src/vcard.h
+++ b/src/vcard.h
@@ -32,8 +32,9 @@ extern "C"
 {
 #endif
 
-/** Search the vcard */
-int search(const char *);
+/** Search the vcard.
+ * The supplied card string will be unfolded in place so must be modifiable. */
+int search(char *);
 
 /** Quote a string for regex's */
 int quote(const char *, char **);
diff --git a/src/xml.c b/src/xml.c
index 5f2e1b3..802f2f5 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -107,7 +107,7 @@ walk_tree(xmlDocPtr doc, xmlNode *node)
 							_("Data:\n%s\n"),
 							data);
 					}
-					search((const char *)data);
+					search((char *)data);
 					xmlFree(data);
 				}
 		}
