1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
--TEST--
mb_ereg_search() stuff
--SKIPIF--
<?php
extension_loaded('mbstring') or die('skip mbstring not available');
function_exists('mb_ereg_search') or die("skip\n");
?>
--INI--
output_handler=
--FILE--
<?php
mb_regex_set_options( '' );
$encs = array( 'EUC-JP', 'Shift_JIS', 'SJIS', 'UTF-8' );
function test_search( $test_enc, $str, $look_for, $opt, $in_enc = 'EUC-JP' ) {
mb_regex_encoding( $test_enc );
$str = mb_convert_encoding( $str, $test_enc, $in_enc );
$look_for = mb_convert_encoding( $look_for, $test_enc, $in_enc );
mb_ereg_search_init( $str, $look_for, $opt );
while ( mb_ereg_search_pos() ) {
$regs = mb_ereg_search_getregs();
array_shift( $regs );
printf( "(%s) (%d) %s\n", $test_enc, mb_ereg_search_getpos(), mb_convert_encoding( ( is_array( $regs ) ? implode( '-', $regs ): '' ), $in_enc, $test_enc ) );
}
}
function do_tests( $enc, $opt ) {
test_search( $enc, "ϡ \n", ' (?ϡ?)[[:space:]]', $opt );
test_search( $enc, 'abcde abdeabcf anvfabc odu abcd ', '(ab[a-z]+)', $opt );
}
foreach( $encs as $enc ) {
do_tests( $enc, '' );
do_tests( $enc, 'x' );
}
?>
--EXPECT--
(EUC-JP) (10)
(EUC-JP) (5) abcde
(EUC-JP) (14) abdeabcf
(EUC-JP) (22) abc
(EUC-JP) (31) abcd
(EUC-JP) (5) ϡ
(EUC-JP) (10)
(EUC-JP) (5) abcde
(EUC-JP) (14) abdeabcf
(EUC-JP) (22) abc
(EUC-JP) (31) abcd
(Shift_JIS) (10)
(Shift_JIS) (5) abcde
(Shift_JIS) (14) abdeabcf
(Shift_JIS) (22) abc
(Shift_JIS) (31) abcd
(Shift_JIS) (5) ϡ
(Shift_JIS) (10)
(Shift_JIS) (5) abcde
(Shift_JIS) (14) abdeabcf
(Shift_JIS) (22) abc
(Shift_JIS) (31) abcd
(SJIS) (10)
(SJIS) (5) abcde
(SJIS) (14) abdeabcf
(SJIS) (22) abc
(SJIS) (31) abcd
(SJIS) (5) ϡ
(SJIS) (10)
(SJIS) (5) abcde
(SJIS) (14) abdeabcf
(SJIS) (22) abc
(SJIS) (31) abcd
(UTF-8) (14)
(UTF-8) (5) abcde
(UTF-8) (14) abdeabcf
(UTF-8) (22) abc
(UTF-8) (31) abcd
(UTF-8) (7) ϡ
(UTF-8) (14)
(UTF-8) (5) abcde
(UTF-8) (14) abdeabcf
(UTF-8) (22) abc
(UTF-8) (31) abcd
|