From 0f0db07c493396b2da1a12022fdf942c5b6c239d Mon Sep 17 00:00:00 2001
From: nicm <nicm>
Date: Mon, 31 Mar 2014 21:43:35 +0000
Subject: [PATCH 5/6] In four byte UTF-8 sequences, only three bits of the
 first byte should be used. Fix from Koga Osamu.

---
 utf8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utf8.c b/utf8.c
index 63723d7..5babcb3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -313,7 +313,7 @@ utf8_combine(const struct utf8_data *utf8data)
 		value = utf8data->data[3] & 0x3f;
 		value |= (utf8data->data[2] & 0x3f) << 6;
 		value |= (utf8data->data[1] & 0x3f) << 12;
-		value |= (utf8data->data[0] & 0x3f) << 18;
+		value |= (utf8data->data[0] & 0x07) << 18;
 		break;
 	}
 	return (value);
-- 
1.9.2

