File: rfc2047e.c

package info (click to toggle)
fetchmail 6.5.6-2
links: PTS
area: main
in suites: forky, sid
size: 7,596 kB
sloc: ansic: 19,190; sh: 7,108; python: 2,395; perl: 564; yacc: 447; lex: 286; makefile: 260; awk: 124; lisp: 84; exp: 43; sed: 17
file content (224 lines) | stat: -rw-r--r-- 6,032 bytes
/** \file rfc2047e.c - encode a string as per RFC-2047
    \copyright Copyright (C) 2004-2024  Matthias Andree

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include "fetchmail.h"

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

/** which characters are permitted to be on header line words without need of 
 * RFC-2047 encoding */
static const char permitted_unencoded_chars[] = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
/** which characters are permitted inside RFC-2047 encoded-words */
static const char permitted_qpencoded_chars[] = "!\"#$%&'*+,-./0123456789:;<>@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^`abcdefghijklmnopqrstuvwxyz{|}~";
/** list of whitespace characters in RFC-5322 format headers */
static const char whitespace_in_header_chars[] = " \t\r\n";

#ifdef TEST
void report (FILE *fp, const char *format, ...) { (void)fp; (void)format;}
#endif

static bool needs_enc(const char *string)
/** needs_enc guesses whether \a string needs any form of RFC-2047 encoding. */
{
    if (strspn(string, permitted_unencoded_chars) < strlen(string))
	return true;
    if (strncmp(string, "=?", 2) == 0
	    && strcmp(string + strlen(string) - 2, "?=") == 0)
	return true;
    return false;
}

/** \bug This RFC-2047 encoder is simplistic and violates the RFC-2047
 * requirement that multibyte characters must not be split across
 * encoded-words.  It makes no attempt to determine the number of bytes
 * in one character.  This would only be noticeable by a mail user agent (MUA)
 * that displays RFC-2047 encoded words piecemeal. A MUA that decodes the
 * entire header's byte stream first and then interprets it as multibyte string
 * will be able to cope.
 */

static char *encode_words(char *const *words, int nwords, const char *charset)
{
    char *out, *t, *v;
    size_t l = 0;
    int i;

    for (i = 0; i < nwords; i++)
	l += strlen(words[i]) * 3; /* worst case, encode everything */
    l += (strlen(charset) + 8) * (l/60 + 1);

    out = v = (char *)xmalloc(l);
    t = stpcpy(out, "=?");
    t = stpcpy(t, charset);
    t = stpcpy(t, "?Q?");
    for (i = 0; i < nwords; i++) {
	const char *u;
	for (u = words[i]; *u; u++) {
	    if (t - v >= 69) {
		t = stpcpy(t, "?=\r\n =?");
		v = t - 3;
		t = stpcpy(t, charset);
		t = stpcpy(t, "?Q?");
	    }
	    if (*u == ' ') { *t++ = '_'; continue; }
	    if (strchr(permitted_qpencoded_chars, *u)) { *t++ = *u; continue; }
	    sprintf(t, "=%02X", (unsigned int)((unsigned char)*u));
	    t += 3;
	}
    }
    strcpy(t, "?=");
    return out;
}

/** RFC-2047 encode string with given charset. Only the Q encoding
 * (quoted-printable) supported at this time.
 * WARNING: this code returns a static buffer!
 */
char *rfc2047e(const char *string, const char *charset) {
    static char *out;
    char *t;
    const char *r;
    int count, minlen, idx, i;
    char **words = NULL;
    size_t l;

    assert(strlen(charset) < 40);
    if (out) {
	free(out);
	out = NULL;
    }

    /* phase 1: split original into words */
    /* 1a: count, 1b: copy */
    count = 0;
    r = string;
    while (*r) {
	count++;
	r += strcspn(r, whitespace_in_header_chars);
	if (!*r) break;
	count++;
	r += strspn(r, whitespace_in_header_chars);
    }
    words = (char **)xmalloc(sizeof(char *) * (count + 1));

    idx = 0;
    r = string;
    while (*r) {
	l = strcspn(r, whitespace_in_header_chars);
	words[idx] = (char *)xmalloc(l+1);
	memcpy(words[idx], r, l);
	words[idx][l] = '\0';
	idx++;
	r += l;
	if (!*r) break;
	l = strspn(r, whitespace_in_header_chars);
	words[idx] = (char *)xmalloc(l+1);
	memcpy(words[idx], r, l);
	words[idx][l] = '\0';
	idx++;
	r += l;
    }

    /* phase 2: encode words */
    /* a: find ranges of adjacent words to need encoding */
    /* b: encode ranges */

    idx = 0;
    while (idx < count) {
	int end; char *tmp;

	if (!needs_enc(words[idx])) {
	    idx += 2;
	    continue;
	}
	for (end = idx + 2; end < count; end += 2) {
	    if (!needs_enc(words[end]))
		break;
	}
	end -= 2;
	tmp = encode_words(&words[idx], end - idx + 1, charset);
	free(words[idx]);
	words[idx] = tmp;
	for (i = idx + 1; i <= end; i++)
	    words[i][0] = '\0';
	idx = end + 2;
    }

    l = 0;
    for (idx = 0; idx < count; idx++) {
	l += strlen(words[idx]);
    }

    /* phase 3: limit lengths */
    minlen = strlen(charset) + 7;
    /* allocate ample memory */
    out = (char *)xmalloc(l + (l / (72 - minlen) + 1) * (minlen + 2) + 1);

    if (count)
	t = stpcpy(out, words[0]);
    else
	t = out, *out = 0;

    l = strlen(out);

    for (i = 1; i < count; i+=2) {
	size_t m;
	char *tmp;

	m = strlen(words[i]);
	if (i + 1 < count)
	    m += strcspn(words[i+1], "\r\n");
	if (l + m > 74)
	    t = stpcpy(t, "\r\n");
	t = stpcpy(t, words[i]);
	if (i + 1 < count) {
	    t = stpcpy(t, words[i+1]);
	}
	tmp = strrchr(out, '\n');
	if (tmp == NULL)
	    tmp = out;
	else
	    tmp++;
	l = strlen(tmp);
    }

    /* free memory */
    for (i = 0; i < count; i++) free(words[i]);
    free(words);
    return out;
}

#ifdef TEST
int main(int argc, char **argv) {
    char *t;

    if (argc > 1) {
	t = rfc2047e(argv[1], argc > 2 ? argv[2] : "utf-8");
	printf( " input: \"%s\"\n"
		"output: \"%s\"\n", argv[1], t);
	free(t);
    }
    return EXIT_SUCCESS;
}
#endif