1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
<?php
namespace TextExtracts\Test;
use TextExtracts\TextTruncator;
/**
* @covers \TextExtracts\TextTruncator
* @group TextExtracts
*
* @license GPL-2.0-or-later
*/
class TextTruncatorTest extends \PHPUnit\Framework\TestCase {
/**
* @dataProvider provideGetFirstSentences
* @param string $text
* @param string $sentences
* @param string $expected
*/
public function testGetFirstSentences( $text, $sentences, $expected ) {
$truncator = new TextTruncator( false );
$this->assertSame( $expected, $truncator->getFirstSentences( $text, $sentences ) );
}
public function provideGetFirstSentences() {
$longLine = str_repeat( 'word ', 1000000 );
return [
[
'Foo is a bar. Such a smart boy. But completely useless.',
2,
'Foo is a bar. Such a smart boy.',
],
[
'Foo is a bar. Such a smart boy. But completely useless.',
1,
'Foo is a bar.',
],
[
'Foo is a bar. Such a smart boy.',
2,
'Foo is a bar. Such a smart boy.',
],
[
'Foo is a bar.',
1,
'Foo is a bar.',
],
[
'Foo is a bar.',
2,
'Foo is a bar.',
],
[
'',
1,
'',
],
'0 sentences mean empty result' => [
'Foo is a bar. Such a smart boy.',
0,
'',
],
"Don't explode on negative input" => [
'Foo is a bar. Such a smart boy.',
-1,
'',
],
'More sentences requested than is available' => [
'Foo is a bar. Such a smart boy.',
3,
'Foo is a bar. Such a smart boy.',
],
// Exclamation points too!!!
[
'Foo is a bar! Such a smart boy! But completely useless!',
1,
'Foo is a bar!',
],
// A tricky one
[
"Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with. " .
"Polyvinyl acetate, however, is another story.",
1,
"Acid phosphatase (EC 3.1.3.2) is a chemical you don't want to mess with.",
],
// No clear sentences
[
"foo\nbar\nbaz",
2,
'foo',
],
// Bug T118621
[
'Foo was born in 1977. He enjoys listening to Siouxsie and the Banshees.',
1,
'Foo was born in 1977.',
],
// Bug T115795 - Test no cropping after initials
[
'P.J. Harvey is a singer. She is awesome!',
1,
'P.J. Harvey is a singer.',
],
// Bug T115817 - Non-breaking space is not a delimiter
[
html_entity_decode( 'Pigeons (lat. Columbidae) are birds. ' .
'They primarily feed on seeds.' ),
1,
html_entity_decode( 'Pigeons (lat. Columbidae) are birds.' ),
],
// Bug T145231 - various problems with regexes
[
$longLine,
3,
trim( $longLine ),
],
[
str_repeat( 'Sentence. ', 70000 ),
65536,
trim( str_repeat( 'Sentence. ', 65536 ) ),
],
'Preserve whitespace before end character' => [
'Aa . Bb',
1,
'Aa .',
],
];
}
/**
* @dataProvider provideGetFirstChars
* @param string $text
* @param string $chars
* @param string $expected
*/
public function testGetFirstChars( $text, $chars, $expected ) {
$truncator = new TextTruncator( false );
$this->assertSame( $expected, $truncator->getFirstChars( $text, $chars ) );
}
public function provideGetFirstChars() {
$text = 'Lullzy lulz are lullzy!';
$html = 'foo<tag>bar</tag>';
$longText = str_repeat( 'тест ', 50000 );
$longTextExpected = trim( str_repeat( 'тест ', 13108 ) );
return [
[ $text, -8, '' ],
[ $text, 0, '' ],
[ $text, 100, $text ],
[ $text, 1, 'Lullzy' ],
[ $text, 6, 'Lullzy' ],
// [ $text, 7, 'Lullzy' ],
[ $text, 8, 'Lullzy lulz' ],
// HTML processing
[ $html, 1, 'foo' ],
// let HTML sanitizer clean it up later
[ $html, 4, 'foo<tag>' ],
[ $html, 12, 'foo<tag>bar</tag>' ],
[ $html, 13, 'foo<tag>bar</tag>' ],
[ $html, 16, 'foo<tag>bar</tag>' ],
[ $html, 17, 'foo<tag>bar</tag>' ],
// T143178 - previously, characters were extracted using regexps which failed when
// requesting 64K chars or more.
[ $longText, 65536, $longTextExpected ],
];
}
public function testTidyIntegration() {
$truncator = new TextTruncator( true );
$text = '<b>Aa. Bb.</b>';
$this->assertSame( '<p><b>Aa.</b></p>', $truncator->getFirstSentences( $text, 1 ) );
$this->assertSame( '<p><b>Aa</b></p>', $truncator->getFirstChars( $text, 4 ) );
}
}
|