File: IPTCTest.php

package info (click to toggle)
mediawiki 1%3A1.43.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 417,464 kB
  • sloc: php: 1,062,949; javascript: 664,290; sql: 9,714; python: 5,458; xml: 3,489; sh: 1,131; makefile: 64
file content (72 lines) | stat: -rw-r--r-- 2,133 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
<?php

/**
 * @group Media
 */
class IPTCTest extends \MediaWikiUnitTestCase {

	/**
	 * @covers \IPTC::getCharset
	 */
	public function testRecognizeUtf8() {
		// utf-8 is the only one used in practise.
		$res = IPTC::getCharset( "\x1b%G" );
		$this->assertEquals( 'UTF-8', $res );
	}

	public static function provideParse() {
		// $rawData, $expectedKeywords

		// basically IPTC for keyword with value of 0xBC which is 1/4 in iso-8859-1
		// This data doesn't specify a charset. We're supposed to guess
		// (which basically means utf-8 if valid, windows 1252 (iso 8859-1) if not)
		yield 'No charset 88591' => [
			"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x06\x1c\x02\x19\x00\x01\xBC",
			[ '¼' ]
		];

		/* This one contains a sequence that's valid iso 8859-1 but not valid utf8 */
		/* \xC3 = Ã, \xB8 = ¸ */
		yield 'No charset 88591b' => [
			"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x09\x1c\x02\x19\x00\x04\xC3\xC3\xC3\xB8",
			[ 'ÃÃø' ]
		];

		// Same as above, but forcing the charset to utf-8. What should happen is the
		// first "\xC3\xC3" should be dropped as invalid, leaving \xC3\xB8, which is ø
		yield 'Forced UTF but invalid' => [
			"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x11\x1c\x02\x19\x00\x04\xC3\xC3\xC3\xB8"
				. "\x1c\x01\x5A\x00\x03\x1B\x25\x47",
			[ 'ø' ]
		];

		yield 'No charset UTF8' => [
			"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x07\x1c\x02\x19\x00\x02¼",
			[ '¼' ]
		];

		// Testing something that has 2 values for keyword
		yield 'Multiple keywords (2)' => [
			"Photoshop 3.0\08BIM\4\4" . /* identifier */
				"\0\0\0\0\0\x0D" . /* length */
				"\x1c\x02\x19\x00\x01\xBC" .
				"\x1c\x02\x19\x00\x02\xBC\xBD",
			[ '¼', '¼½' ]
		];

		// This has the magic "\x1c\x01\x5A\x00\x03\x1B\x25\x47" which marks content as UTF8.
		yield 'UTF8' => [
			"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x0F\x1c\x02\x19\x00\x02¼\x1c\x01\x5A\x00\x03\x1B\x25\x47",
			[ '¼' ]
		];
	}

	/**
	 * @covers \IPTC::parse
	 * @dataProvider provideParse
	 */
	public function testIPTCParseUTF8( $rawData, $expectedKeywords ) {
		$res = IPTC::parse( $rawData );
		$this->assertEquals( $expectedKeywords, $res['Keywords'] );
	}
}