1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
From 0f0db07c493396b2da1a12022fdf942c5b6c239d Mon Sep 17 00:00:00 2001
From: nicm <nicm>
Date: Mon, 31 Mar 2014 21:43:35 +0000
Subject: [PATCH 5/6] In four byte UTF-8 sequences, only three bits of the
first byte should be used. Fix from Koga Osamu.
---
utf8.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/utf8.c b/utf8.c
index 63723d7..5babcb3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -313,7 +313,7 @@ utf8_combine(const struct utf8_data *utf8data)
value = utf8data->data[3] & 0x3f;
value |= (utf8data->data[2] & 0x3f) << 6;
value |= (utf8data->data[1] & 0x3f) << 12;
- value |= (utf8data->data[0] & 0x3f) << 18;
+ value |= (utf8data->data[0] & 0x07) << 18;
break;
}
return (value);
--
1.9.2
|