File: ExtractFormatterTest.php

package info (click to toggle)
mediawiki 1%3A1.35.13-1%2Bdeb11u2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 274,932 kB
  • sloc: php: 677,563; javascript: 572,709; sql: 11,565; python: 4,447; xml: 3,145; sh: 892; perl: 788; ruby: 496; pascal: 365; makefile: 128
file content (83 lines) | stat: -rw-r--r-- 3,409 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
<?php

namespace TextExtracts\Test;

use MediaWikiTestCase;
use TextExtracts\ExtractFormatter;

/**
 * @covers \TextExtracts\ExtractFormatter
 * @group TextExtracts
 *
 * @license GPL-2.0-or-later
 */
class ExtractFormatterTest extends MediaWikiTestCase {

	/**
	 * @dataProvider provideExtracts
	 */
	public function testExtracts( $expected, $text, $plainText ) {
		$fmt = new ExtractFormatter( $text, $plainText );
		// .metadata class will be added via $wgExtractsRemoveClasses on WMF
		$fmt->remove( [ 'div', '.metadata' ] );
		$text = $fmt->getText();
		$this->assertSame( $expected, $text );
	}

	public function provideExtracts() {
		// phpcs:ignore Generic.Files.LineLength
		$dutch = '<b>Dutch</b> (<span class="unicode haudio" style="white-space:nowrap;"><span class="fn"><a href="/wiki/File:Nl-Nederlands.ogg" title="About this sound"><img alt="About this sound" src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/11px-Loudspeaker.svg.png" width="11" height="11" srcset="https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/17px-Loudspeaker.svg.png 1.5x, https://upload.wikimedia.org/wikipedia/commons/thumb/8/8a/Loudspeaker.svg/22px-Loudspeaker.svg.png 2x" /></a>&#160;<a href="https://upload.wikimedia.org/wikipedia/commons/d/db/Nl-Nederlands.ogg" class="internal" title="Nl-Nederlands.ogg"><i>Nederlands</i></a></span>&#160;<small class="metadata audiolinkinfo" style="cursor:help;">(<a href="/w/index.php?title=Wikipedia:Media_help&amp;action=edit&amp;redlink=1" class="new" title="Wikipedia:Media help (page does not exist)"><span style="cursor:help;">help</span></a>ยท<a href="/wiki/File:Nl-Nederlands.ogg" title="File:Nl-Nederlands.ogg"><span style="cursor:help;">info</span></a>)</small></span>) is a <a href="/w/index.php?title=West_Germanic_languages&amp;action=edit&amp;redlink=1" class="new" title="West Germanic languages (page does not exist)">West Germanic language</a> and the native language of most of the population of the <a href="/w/index.php?title=Netherlands&amp;action=edit&amp;redlink=1" class="new" title="Netherlands (page does not exist)">Netherlands</a>';
		$tocText = 'Lead<div id="toc" class="toc">TOC goes here</div>
<h1>Section</h1>
<p>Section text</p>';

		return [
			[
				'Dutch ( Nederlands ) is a West Germanic language and the native language of ' .
					'most of the population of the Netherlands',
				$dutch,
				true,
			],

			'HTML cleanup in HTML mode' => [
				"\u{00A0}A &amp; <b>B</b>",
				"&#x0A;&nbsp;<a>A</a> &amp; <b>&#x42;</b>\r\n",
				false
			],
			'HTML cleanup in plain text mode' => [
				'A & B',
				"&#x0A;&nbsp;<a>A</a> &amp; <b>&#x42;</b>\r\n",
				true
			],

			[
				"<span><span lang=\"baz\">qux</span></span>",
				'<span class="foo"><span lang="baz">qux</span></span>',
				false,
			],
			[
				"<span><span lang=\"baz\">qux</span></span>",
				'<span style="foo: bar;"><span lang="baz">qux</span></span>',
				false,
			],
			[
				"<span><span lang=\"qux\">quux</span></span>",
				'<span class="foo"><span style="bar: baz;" lang="qux">quux</span></span>',
				false,
			],
			[
				// Verify that TOC is properly removed (HTML mode)
				"Lead\n<h1>Section</h1>\n<p>Section text</p>",
				$tocText,
				false,
			],
			[
				// Verify that TOC is properly removed (plain text mode)
				"Lead\n\n\x01\x021\2\1Section\nSection text",
				$tocText,
				true,
			],
		];
	}

}