1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.111.2.2.2.14&r2=1.111.2.2.2.15&view=patch
--- old/ext/standard/html.c 2007/05/27 15:57:11 1.111.2.2.2.14
+++ new/ext/standard/html.c 2007/10/03 04:53:05 1.111.2.2.2.15
@@ -484,18 +484,29 @@
} \
mbseq[mbpos++] = (mbchar); }
+#define CHECK_LEN(pos, chars_need) \
+ if((str_len - (pos)) < chars_need) { \
+ *status = FAILURE; \
+ return 0; \
+ }
+
/* {{{ get_next_char
*/
inline static unsigned short get_next_char(enum entity_charset charset,
unsigned char * str,
+ int str_len,
int * newpos,
unsigned char * mbseq,
- int * mbseqlen)
+ int * mbseqlen,
+ int *status)
{
int pos = *newpos;
int mbpos = 0;
int mbspace = *mbseqlen;
unsigned short this_char = str[pos++];
+ unsigned char next_char;
+
+ *status = SUCCESS;
if (mbspace <= 0) {
*mbseqlen = 0;
@@ -517,6 +528,10 @@
do {
if (this_char < 0x80) {
more = 0;
+ if(stat) {
+ /* we didn't finish the UTF sequence correctly */
+ *status = FAILURE;
+ }
break;
} else if (this_char < 0xc0) {
switch (stat) {
@@ -555,6 +570,7 @@
break;
default:
/* invalid */
+ *status = FAILURE;
more = 0;
}
}
@@ -562,21 +578,27 @@
else if (this_char < 0xe0) {
stat = 0x10; /* 2 byte */
utf = (this_char & 0x1f) << 6;
+ CHECK_LEN(pos, 1);
} else if (this_char < 0xf0) {
stat = 0x20; /* 3 byte */
utf = (this_char & 0xf) << 12;
+ CHECK_LEN(pos, 2);
} else if (this_char < 0xf8) {
stat = 0x30; /* 4 byte */
utf = (this_char & 0x7) << 18;
+ CHECK_LEN(pos, 3);
} else if (this_char < 0xfc) {
stat = 0x40; /* 5 byte */
utf = (this_char & 0x3) << 24;
+ CHECK_LEN(pos, 4);
} else if (this_char < 0xfe) {
stat = 0x50; /* 6 byte */
utf = (this_char & 0x1) << 30;
+ CHECK_LEN(pos, 5);
} else {
/* invalid; bail */
more = 0;
+ *status = FAILURE;
break;
}
@@ -594,7 +616,8 @@
/* check if this is the first of a 2-byte sequence */
if (this_char >= 0xa1 && this_char <= 0xfe) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if ((next_char >= 0x40 && next_char <= 0x7e) ||
(next_char >= 0xa1 && next_char <= 0xfe)) {
/* yes, this a wide char */
@@ -614,7 +637,8 @@
(this_char >= 0xe0 && this_char <= 0xef)
) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if ((next_char >= 0x40 && next_char <= 0x7e) ||
(next_char >= 0x80 && next_char <= 0xfc))
{
@@ -633,7 +657,8 @@
/* check if this is the first of a multi-byte sequence */
if (this_char >= 0xa1 && this_char <= 0xfe) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if (next_char >= 0xa1 && next_char <= 0xfe) {
/* yes, this a jis kanji char */
this_char <<= 8;
@@ -644,7 +669,8 @@
} else if (this_char == 0x8e) {
/* peek at the next char */
- unsigned char next_char = str[pos];
+ CHECK_LEN(pos, 1);
+ next_char = str[pos];
if (next_char >= 0xa1 && next_char <= 0xdf) {
/* JIS X 0201 kana */
this_char <<= 8;
@@ -655,8 +681,10 @@
} else if (this_char == 0x8f) {
/* peek at the next two char */
- unsigned char next_char = str[pos];
- unsigned char next2_char = str[pos+1];
+ unsigned char next2_char;
+ CHECK_LEN(pos, 2);
+ next_char = str[pos];
+ next2_char = str[pos+1];
if ((next_char >= 0xa1 && next_char <= 0xfe) &&
(next2_char >= 0xa1 && next2_char <= 0xfe)) {
/* JIS X 0212 hojo-kanji */
@@ -1098,13 +1126,22 @@
maxlen = 128;
replaced = emalloc (maxlen);
len = 0;
-
i = 0;
while (i < oldlen) {
unsigned char mbsequence[16]; /* allow up to 15 characters in a multibyte sequence */
int mbseqlen = sizeof(mbsequence);
- unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen);
+ int status = SUCCESS;
+ unsigned short this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status);
+ if(status == FAILURE) {
+ /* invalid MB sequence */
+ efree(replaced);
+ if(!PG(display_errors)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument");
+ }
+ *newlen = 0;
+ return STR_EMPTY_ALLOC();
+ }
matches_map = 0;
if (len + 16 > maxlen)
|