1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
--TEST--
Test mb_encode_numericentity() function: Convert text to HTML entities
--EXTENSIONS--
mbstring
--FILE--
<?php
$str1 = '¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ';
$str2 = 'ƒΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϑϒϖ•…′″‾⁄℘ℑℜ™ℵ←↑→↓↔↵⇐⇑⇒⇓⇔∀∂∃∅∇∈∉∋∏∑−∗√∝∞∠∧∨∩∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌈⌉⌊⌋〈〉◊♠♣♥♦';
$convmap = array(0x0, 0x2FFFF, 0, 0xFFFF);
echo "1: " . mb_encode_numericentity($str1, $convmap, "UTF-8") . "\n";
echo "1 (hex): " . mb_encode_numericentity($str1, $convmap, "UTF-8", true) . "\n";
echo "2: " . mb_encode_numericentity($str2, $convmap, "UTF-8") . "\n";
$convmap = array(0xFF, 0x2FFFF, 0, 0xFFFF);
echo "3: " . mb_encode_numericentity('aŒbœcŠdše€fg', $convmap, "UTF-8") . "\n";
$convmap = [];
echo "4: " . mb_encode_numericentity('föo', $convmap, "UTF-8") . "\n";
// HTML-encode a null byte
echo "6: " . mb_encode_numericentity("\x00", array(0, 1, 0, 0xFFFF), "UTF-8", false) . "\n";
echo "6 (hex): " . mb_encode_numericentity("\x00", array(0, 1, 0, 0xFFFF), "UTF-8", true) . "\n";
// Try doing weird things with convmap
$convmap = [
0, 0, 0, 0, // Only one codepoint, empty mask
100, 50, 0, 0xFFFF // 'End' codepoint is before 'start' codepoint
];
echo "7: " . mb_encode_numericentity('föo', $convmap, "UTF-8") . "\n";
echo "7 (hex): " . mb_encode_numericentity('föo', $convmap, "UTF-8", true) . "\n";
// Try using positive offset
$convmap = [0, 10, 1000, 0xFFFF];
echo "8: " . mb_encode_numericentity("\x01\x02\x03", $convmap, "UTF-8") . "\n";
echo "8 (hex): " . mb_encode_numericentity("\x01\x02\x03", $convmap, "UTF-8", true) . "\n";
// Try using negative offset
$convmap = [0, 10, -100000, 0xFFFFFF];
echo "9: " . mb_encode_numericentity("\x01\x02\x03", $convmap, "UTF-8") . "\n";
echo "9 (hex): " . mb_encode_numericentity("\x01\x02\x03", $convmap, "UTF-8", true) . "\n";
// Try using mask to remove some bits
$convmap = [0, 1000, 0, 0x123];
echo "10: " . mb_encode_numericentity('föo', $convmap, "UTF-8") . "\n";
echo "10 (hex): " . mb_encode_numericentity('föo', $convmap, "UTF-8", true) . "\n";
// Try different text encoding
$convmap = [0, 0xFFFF, 0, 0xFFFF];
$iso2022jp = "\x1B\$B\x21\x21!r\x1B(BABC";
echo "11: " . mb_encode_numericentity($iso2022jp, $convmap, "ISO-2022-JP") . "\n";
echo "11 (hex): " . mb_encode_numericentity($iso2022jp, $convmap, "ISO-2022-JP", true) . "\n";
// Regression test; the old implementation could only emit decimal entities with about 7 digits
$convmap = [0x2b, 0x2d4, 0x75656500, 0x656d2c53];
echo "12: " . mb_encode_numericentity("m", $convmap, "ASCII") . "\n";
?>
--EXPECT--
1: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
1 (hex): ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
2: ƒΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϑϒϖ•…′″‾⁄℘ℑℜ™ℵ←↑→↓↔↵⇐⇑⇒⇓⇔∀∂∃∅∇∈∉∋∏∑−∗√∝∞∠∧∨∩∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌈⌉⌊⌋〈〉◊♠♣♥♦
3: aŒbœcŠdše€fg
4: föo
6: �
6 (hex): �
7: föo
7 (hex): föo
8: ϩϪϫ
8 (hex): ϩϪϫ
9: ���
9 (hex): ���
10: ""#
10 (hex): ""#
11:  £ABC
11 (hex):  £ABC
12: �
|