File: MimeAnalyzerTest.php

package info (click to toggle)
mediawiki 1%3A1.43.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 417,464 kB
  • sloc: php: 1,062,949; javascript: 664,290; sql: 9,714; python: 5,458; xml: 3,489; sh: 1,131; makefile: 64
file content (335 lines) | stat: -rw-r--r-- 12,172 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
<?php

namespace Wikimedia\Tests\Mime;

use MediaWikiCoversValidator;
use PHPUnit\Framework\TestCase;
use ReflectionClass;
use Wikimedia\Mime\MimeAnalyzer;
use Wikimedia\TestingAccessWrapper;

/**
 * @group Media
 * @group Mime
 * @covers \Wikimedia\Mime\MimeAnalyzer
 */
class MimeAnalyzerTest extends TestCase {

	use MediaWikiCoversValidator;

	/** @var MimeAnalyzer */
	private $mimeAnalyzer;

	protected function setUp(): void {
		parent::setUp();
		$this->mimeAnalyzer = $this->createMimeAnalyzer();
	}

	private function createMimeAnalyzer() {
		return new MimeAnalyzer( [
			'infoFile' => MimeAnalyzer::USE_INTERNAL,
			'typeFile' => MimeAnalyzer::USE_INTERNAL,
			'xmlTypes' => [
				'http://www.w3.org/2000/svg:svg' => 'image/svg+xml',
				'svg' => 'image/svg+xml',
				'http://www.lysator.liu.se/~alla/dia/:diagram' => 'application/x-dia-diagram',
				'http://www.w3.org/1999/xhtml:html' => 'text/html', // application/xhtml+xml?
				'html' => 'text/html', // application/xhtml+xml?
			]
		] );
	}

	private function doGuessMimeType( array $parameters = [] ) {
		$class = new ReflectionClass( get_class( $this->mimeAnalyzer ) );
		$method = $class->getMethod( 'doGuessMimeType' );
		$method->setAccessible( true );
		return $method->invokeArgs( $this->mimeAnalyzer, $parameters );
	}

	/**
	 * @dataProvider providerImproveTypeFromExtension
	 * @param string $ext File extension (no leading dot)
	 * @param string $oldMime Initially detected MIME
	 * @param string|null $expectedMime MIME type after taking extension into account
	 */
	public function testImproveTypeFromExtension( $ext, $oldMime, $expectedMime ) {
		$actualMime = $this->mimeAnalyzer->improveTypeFromExtension( $oldMime, $ext );
		$this->assertEquals( $expectedMime, $actualMime );
	}

	public static function providerImproveTypeFromExtension() {
		return [
			[ 'gif', 'image/gif', 'image/gif' ],
			[ 'gif', 'unknown/unknown', 'unknown/unknown' ],
			[ 'wrl', 'unknown/unknown', 'model/vrml' ],
			[ 'txt', 'text/plain', 'text/plain' ],
			[ 'csv', 'text/plain', 'text/csv' ],
			[ 'tsv', 'text/plain', 'text/tab-separated-values' ],
			[ 'js', 'text/javascript', 'application/javascript' ],
			[ 'js', 'application/x-javascript', 'application/javascript' ],
			[ 'json', 'text/plain', 'application/json' ],
			[ 'foo', 'application/x-opc+zip', 'application/zip' ],
			[ 'docx', 'application/x-opc+zip',
				'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ],
			[ 'djvu', 'image/x-djvu', 'image/vnd.djvu' ],
			[ 'wav', 'audio/wav', 'audio/wav' ],
			[ 'odt', 'application/vnd.oasis.opendocument',
				'application/vnd.oasis.opendocument.text' ],

			// XXX: It's probably wrong (as in: confusing and error-prone) for
			//   ::improveTypeFromExtension to return null (T253483).
			//   This test case exists to ensure that any changes to the existing
			//   behavior are intentional.
			[ 'no_such_extension', 'unknown/unknown', null ],
		];
	}

	public static function provideGetMediaType() {
		// Make sure encoder=ffmpeg2theora don't trigger MEDIATYPE_VIDEO (T65584)
		yield 'Recognize ogg' => [ 'say-test.ogg', 'application/ogg', MEDIATYPE_AUDIO ];

		// Make sure Opus audio files don't trigger MEDIATYPE_MULTIMEDIA (T151352)
		yield 'Recognize Opus' => [ 'say-test.opus', 'application/ogg', MEDIATYPE_AUDIO ];

		// Make sure mp3 files are detected as audio type
		yield 'Recognize mp3' => [ 'say-test-with-id3.mp3', null, MEDIATYPE_AUDIO ];

		yield 'Unknown Extension' => [ 'unknown_extension', null, MEDIATYPE_UNKNOWN ];
	}

	/**
	 * @dataProvider provideGetMediaType
	 */
	public function testGetMediaType( $file, $mime, $expectType ) {
		$file = __DIR__ . '/../../../../data/media/' . $file;
		$this->assertEquals(
			$expectType,
			$this->mimeAnalyzer->getMediaType( $file, $mime )
		);
	}

	public static function provideDoGuessMimeType() {
		// Make sure MP3 with id3 tag is recognized
		yield 'Recognize mp3 with id3' => [ 'say-test-with-id3.mp3', 'mp3', 'audio/mpeg' ];

		// Make sure MP3 without id3 tag is recognized (MPEG-1 sample rates)
		yield 'Recognize mp3 no id3, MPEG-1' => [ 'say-test-mpeg1.mp3', 'mp3', 'audio/mpeg' ];

		// Make sure MP3 without id3 tag is recognized (MPEG-2 sample rates)
		yield 'Recognize mp3 no id3, MPEG-2' => [ 'say-test-mpeg2.mp3', 'mp3', 'audio/mpeg' ];

		// Make sure MP3 without id3 tag is recognized (MPEG-2.5 sample rates)
		yield 'Recognize mp3 no id3, MPEG-2.5' => [ 'say-test-mpeg2.5.mp3', 'mp3', 'audio/mpeg' ];

		// A ZIP file embedded in the middle of a .doc file is still a Word Document
		yield 'ZIP in DOC' => [ 'zip-in-doc.doc', 'doc', 'application/msword' ];

		yield 'Jpeg2000, lossless' => [ 'jpeg2000-lossless.jp2', 'jp2', 'image/jp2' ];

		yield 'Jpeg2000, part 2' => [ 'jpeg2000-profile.jpf', 'jpf', 'image/jpx' ];
	}

	/**
	 * @dataProvider provideDoGuessMimeType
	 */
	public function testDoGuessMimeType( $file, $ext, $expectType ) {
		$file = __DIR__ . '/../../../../data/media/' . $file;
		$this->assertEquals(
			$expectType,
			$this->doGuessMimeType( [ $file, $ext ] )
		);
	}

	public static function provideDetectZipTypeFromFile() {
		return [
			'[Content_Type].xml at end (T291750)' => [
				'type-at-end.docx',
				'application/x-opc+zip'
			],
			'Typical ODT gives fake generic type' => [
				'lo6-empty.odt',
				'application/vnd.oasis.opendocument'
			],
			'Ye olde GIFAR vulnerability' => [
				'gifar.gif',
				'application/java',
			],

			// todo : Add test case for ZIP file with .zip extension
		];
	}

	/**
	 * @dataProvider provideDetectZipTypeFromFile
	 * @param string $fileName
	 * @param string $expected
	 */
	public function testDetectZipTypeFromFile( $fileName, $expected ) {
		$file = fopen( __DIR__ . '/../../../../data/media/' . $fileName, 'r' );
		$this->assertEquals(
			$expected,
			$this->mimeAnalyzer->detectZipTypeFromFile( $file )
		);
	}

	public static function providePngZipConfusion() {
		return [
			[
				'An invalid ZIP file due to the signature being too close to the ' .
					'end to accommodate an EOCDR',
				'zip-sig-near-end.png',
				'image/png',
			],
			[
				'An invalid ZIP file due to the comment length running beyond the ' .
					'end of the file',
				'zip-comment-overflow.png',
				'image/png',
			],
			[
				'A ZIP file similar to the above, but without either of those two ' .
					'problems. Not a valid ZIP file, but it passes MimeAnalyzer\'s ' .
					'definition of a ZIP file. This is mostly a double check of the ' .
					'above two tests.',
				'zip-kind-of-valid.png',
				'application/zip',
			],
			[
				'As above with non-zero comment length',
				'zip-kind-of-valid-2.png',
				'application/zip',
			],
			[
				'Ye olde GIFAR vulnerability',
				'gifar.gif',
				'application/java'
			]
		];
	}

	/**
	 * @dataProvider providePngZipConfusion
	 */
	public function testPngZipConfusion( $description, $fileName, $expectedType ) {
		$file = __DIR__ . '/../../../../data/media/' . $fileName;
		$actualType = $this->doGuessMimeType( [ $file, 'png' ] );
		$this->assertEquals( $expectedType, $actualType, $description );
	}

	/**
	 * The empty string is not a MIME type and should not be mapped to a file extension.
	 */
	public function testNoEmptyStringMimeType() {
		$this->assertSame( [], $this->mimeAnalyzer->getExtensionsFromMimeType( '' ) );
	}

	public function testAddExtraTypes() {
		$mime = new MimeAnalyzer( [
			'infoFile' => MimeAnalyzer::USE_INTERNAL,
			'typeFile' => MimeAnalyzer::USE_INTERNAL,
			'xmlTypes' => [],
			'initCallback' => static function ( $instance ) {
				$instance->addExtraTypes( 'fake/mime fake_extension' );
				$instance->addExtraInfo( 'fake/mime [OFFICE]' );
				$instance->mExtToMime[ 'no_such_extension' ] = 'fake/mime';
			},
		] );
		$this->assertSame( [ 'fake/mime' ], $mime->getMimeTypesFromExtension( 'fake_extension' ) );
		$this->assertSame( 'fake/mime', $mime->getMimeTypeFromExtensionOrNull( 'no_such_extension' ) );

		$mimeAccess = TestingAccessWrapper::newFromObject( $mime );
		$this->assertSame( MEDIATYPE_OFFICE, $mimeAccess->findMediaType( '.fake_extension' ) );
	}

	public function testGetMimeTypesFromNoExtension() {
		$this->assertSame( [], $this->mimeAnalyzer->getMimeTypesFromExtension( 'no_such_extension' ) );
	}

	public static function provideFileExtensions() {
		yield 'ttf file extension should output font/sfnt' => [ 'ttf', 'font/sfnt' ];
		yield 'ttf file extension should output application/font-sfnt' => [ 'ttf', 'application/font-sfnt' ];
		yield 'woff file extension should output font/woff' => [ 'woff', 'font/woff' ];
		yield 'woff file extension should output application/font/woff' => [ 'woff', 'application/font-woff' ];
		yield 'woff2 file extension should output font/woff' => [ 'woff2', 'font/woff2' ];
		yield 'woff2 file extension should output application/font/woff' => [ 'woff2', 'application/font-woff2' ];
		yield 'webm file extension should return video/webm' => [ 'webm', 'video/webm' ];
		yield 'webm file extension should return audio/webm' => [ 'webm', 'audio/webm' ];
	}

	/**
	 * @dataProvider provideFileExtensions
	 */
	public function testGetMimeTypesFromExtension( $inputFileExtension, $expectedOutput ) {
		$actualOutput = $this->mimeAnalyzer->getMimeTypesFromExtension( $inputFileExtension );
		$this->assertContains( $expectedOutput, $actualOutput );
	}

	public static function provideFileExtensionsForMimeType() {
		yield 'font/sfnt should output ttf and otf file extensionss' => [ 'font/sfnt', [ 'ttf', 'otf' ] ];
		yield 'application/font-sfnt should output ttf file extension' => [ 'application/font-sfnt', [ 'ttf' ] ];
		yield 'font/woff should output woff file extension' => [ 'font/woff', [ 'woff' ] ];
		yield 'application/font-woff should output woff file extension' => [ 'application/font-woff', [ 'woff' ] ];
		yield 'font/woff2 should output woff2 file extension' => [ 'font/woff2', [ 'woff2' ] ];
		yield 'application/font-woff2 should output woff2 file extension' => [ 'application/font-woff2', [ 'woff2' ] ];
		yield 'text/sgml should output sgml file extension' => [ 'text/sgml', [ 'sgml', 'sgm' ] ];
		yield 'text/javascript should output js file extension' => [ 'text/javascript', [ 'js' ] ];
	}

	/**
	 * @dataProvider provideFileExtensionsForMimeType
	 */
	public function testGetExtensionsFromMimeType( $inputMimeType, $expectedOutput ) {
		$actualOutput = $this->mimeAnalyzer->getExtensionsFromMimeType( $inputMimeType );
		$this->assertSame( $expectedOutput, $actualOutput );
	}

	public function testGetMimeTypeFromExtensionOrNull() {
		$this->assertSame( 'video/webm', $this->mimeAnalyzer->getMimeTypeFromExtensionOrNull( 'webm' ) );
		$this->assertNull( $this->mimeAnalyzer->getMimeTypeFromExtensionOrNull( 'no_such_extension' ) );
	}

	public function testGetExtensionsFromFakeMimeType() {
		$this->assertSame( [], $this->mimeAnalyzer->getExtensionsFromMimeType( 'fake/mime' ) );
	}

	public function testGetExtensionFromMimeTypeOrNull() {
		$this->assertSame( 'sgml', $this->mimeAnalyzer->getExtensionFromMimeTypeOrNull( 'text/sgml' ) );
		$this->assertNull( $this->mimeAnalyzer->getExtensionFromMimeTypeOrNull( 'fake/mime' ) );
	}

	public function testGetMediaTypes() {
		$mimeAnalyzer = $this->createMimeAnalyzer();
		$mediaTypes = $mimeAnalyzer->getMediaTypes();

		$this->assertIsArray( $mediaTypes );
		$this->assertNotEmpty( $mediaTypes );

		$this->assertContains( 'BITMAP', $mediaTypes );
	}

	public function testGetMediaTypeForNullCase() {
		$mimeAnalyzer = $this->createMimeAnalyzer();

		// Test case when both $mime and $path are null
		$this->assertEquals( MEDIATYPE_UNKNOWN, $mimeAnalyzer->getMediaType() );
	}

	public function testIsMatchingExtension() {
		$analyzer = $this->createMimeAnalyzer();

		// Passing an unknown MIME type
		$this->assertNull( $analyzer->isMatchingExtension( 'application/x-custom', 'jpg' ) );
	}

	public function testIsValidMajorMimeTypeTrue() {
		$analyzer = $this->createMimeAnalyzer();

		$this->assertTrue( $analyzer->isValidMajorMimeType( 'image' ) );
	}

	public function testIsValidMajorMimeTypeFalse() {
		$analyzer = $this->createMimeAnalyzer();

		$this->assertFalse( $analyzer->isValidMajorMimeType( 'font' ) );
	}
}