| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 
 | #include "test-tool.h"
static const char *utf8_replace_character = "�";
/*
 * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
 * in an XML file.
 */
int cmd__xml_encode(int argc UNUSED, const char **argv UNUSED)
{
	unsigned char buf[1024], tmp[4], *tmp2 = NULL;
	ssize_t cur = 0, len = 1, remaining = 0;
	unsigned char ch;
	for (;;) {
		if (++cur == len) {
			len = xread(0, buf, sizeof(buf));
			if (!len)
				return 0;
			if (len < 0)
				die_errno("Could not read <stdin>");
			cur = 0;
		}
		ch = buf[cur];
		if (tmp2) {
			if ((ch & 0xc0) != 0x80) {
				fputs(utf8_replace_character, stdout);
				tmp2 = NULL;
				cur--;
				continue;
			}
			*tmp2 = ch;
			tmp2++;
			if (--remaining == 0) {
				fwrite(tmp, tmp2 - tmp, 1, stdout);
				tmp2 = NULL;
			}
			continue;
		}
		if (!(ch & 0x80)) {
			/* 0xxxxxxx */
			if (ch == '&')
				fputs("&", stdout);
			else if (ch == '\'')
				fputs("'", stdout);
			else if (ch == '"')
				fputs(""", stdout);
			else if (ch == '<')
				fputs("<", stdout);
			else if (ch == '>')
				fputs(">", stdout);
			else if (ch >= 0x20)
				fputc(ch, stdout);
			else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
				fprintf(stdout, "&#x%02x;", ch);
			else
				fputs(utf8_replace_character, stdout);
		} else if ((ch & 0xe0) == 0xc0) {
			/* 110XXXXx 10xxxxxx */
			tmp[0] = ch;
			remaining = 1;
			tmp2 = tmp + 1;
		} else if ((ch & 0xf0) == 0xe0) {
			/* 1110XXXX 10Xxxxxx 10xxxxxx */
			tmp[0] = ch;
			remaining = 2;
			tmp2 = tmp + 1;
		} else if ((ch & 0xf8) == 0xf0) {
			/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
			tmp[0] = ch;
			remaining = 3;
			tmp2 = tmp + 1;
		} else
			fputs(utf8_replace_character, stdout);
	}
	return 0;
}
 |