1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
|
<?php
namespace TextExtracts\Test;
use MediaWikiTestCase;
use TextExtracts\ExtractFormatter;
/**
* @covers \TextExtracts\ExtractFormatter
* @group TextExtracts
*
* @license GPL-2.0-or-later
*/
class ExtractFormatterTest extends MediaWikiTestCase {
/**
* @dataProvider provideExtracts
*/
public function testExtracts( $expected, $text, $plainText ) {
$fmt = new ExtractFormatter( $text, $plainText );
// .metadata class will be added via $wgExtractsRemoveClasses on WMF
$fmt->remove( [ 'div', '.metadata' ] );
$text = $fmt->getText();
$this->assertSame( $expected, $text );
}
public function provideExtracts() {
// phpcs:ignore Generic.Files.LineLength
$dutch = '<b>Dutch</b> (<span class="unicode haudio" style="white-space:nowrap;"><span class="fn"><a href="/wiki/File:Nl-Nederlands.ogg" title="About this sound"><img alt="About this sound" src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/11px-Loudspeaker.svg.png" width="11" height="11" srcset="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/17px-Loudspeaker.svg.png 1.5x, https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/22px-Loudspeaker.svg.png 2x" /></a> <a href="https://upload.wikimedia.org/wikipedia/commons/d/db/Nl-Nederlands.ogg" class="internal" title="Nl-Nederlands.ogg"><i>Nederlands</i></a></span> <small class="metadata audiolinkinfo" style="cursor:help;">(<a href="/w/index.php?title=Wikipedia:Media_help&action=edit&redlink=1" class="new" title="Wikipedia:Media help (page does not exist)"><span style="cursor:help;">help</span></a>ยท<a href="/wiki/File:Nl-Nederlands.ogg" title="File:Nl-Nederlands.ogg"><span style="cursor:help;">info</span></a>)</small></span>) is a <a href="/w/index.php?title=West_Germanic_languages&action=edit&redlink=1" class="new" title="West Germanic languages (page does not exist)">West Germanic language</a> and the native language of most of the population of the <a href="/w/index.php?title=Netherlands&action=edit&redlink=1" class="new" title="Netherlands (page does not exist)">Netherlands</a>';
$tocText = 'Lead<div id="toc" class="toc">TOC goes here</div>
<h1>Section</h1>
<p>Section text</p>';
return [
[
'Dutch ( Nederlands ) is a West Germanic language and the native language of ' .
'most of the population of the Netherlands',
$dutch,
true,
],
'HTML cleanup in HTML mode' => [
"\u{00A0}A & <b>B</b>",
"
 <a>A</a> & <b>B</b>\r\n",
false
],
'HTML cleanup in plain text mode' => [
'A & B',
"
 <a>A</a> & <b>B</b>\r\n",
true
],
[
"<span><span lang=\"baz\">qux</span></span>",
'<span class="foo"><span lang="baz">qux</span></span>',
false,
],
[
"<span><span lang=\"baz\">qux</span></span>",
'<span style="foo: bar;"><span lang="baz">qux</span></span>',
false,
],
[
"<span><span lang=\"qux\">quux</span></span>",
'<span class="foo"><span style="bar: baz;" lang="qux">quux</span></span>',
false,
],
[
// Verify that TOC is properly removed (HTML mode)
"Lead\n<h1>Section</h1>\n<p>Section text</p>",
$tocText,
false,
],
[
// Verify that TOC is properly removed (plain text mode)
"Lead\n\n\x01\x021\2\1Section\nSection text",
$tocText,
true,
],
];
}
}
|