1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
<?php
/**
* @group Media
*/
class IPTCTest extends \MediaWikiUnitTestCase {
/**
* @covers \IPTC::getCharset
*/
public function testRecognizeUtf8() {
// utf-8 is the only one used in practise.
$res = IPTC::getCharset( "\x1b%G" );
$this->assertEquals( 'UTF-8', $res );
}
public static function provideParse() {
// $rawData, $expectedKeywords
// basically IPTC for keyword with value of 0xBC which is 1/4 in iso-8859-1
// This data doesn't specify a charset. We're supposed to guess
// (which basically means utf-8 if valid, windows 1252 (iso 8859-1) if not)
yield 'No charset 88591' => [
"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x06\x1c\x02\x19\x00\x01\xBC",
[ '¼' ]
];
/* This one contains a sequence that's valid iso 8859-1 but not valid utf8 */
/* \xC3 = Ã, \xB8 = ¸ */
yield 'No charset 88591b' => [
"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x09\x1c\x02\x19\x00\x04\xC3\xC3\xC3\xB8",
[ 'ÃÃø' ]
];
// Same as above, but forcing the charset to utf-8. What should happen is the
// first "\xC3\xC3" should be dropped as invalid, leaving \xC3\xB8, which is ø
yield 'Forced UTF but invalid' => [
"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x11\x1c\x02\x19\x00\x04\xC3\xC3\xC3\xB8"
. "\x1c\x01\x5A\x00\x03\x1B\x25\x47",
[ 'ø' ]
];
yield 'No charset UTF8' => [
"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x07\x1c\x02\x19\x00\x02¼",
[ '¼' ]
];
// Testing something that has 2 values for keyword
yield 'Multiple keywords (2)' => [
"Photoshop 3.0\08BIM\4\4" . /* identifier */
"\0\0\0\0\0\x0D" . /* length */
"\x1c\x02\x19\x00\x01\xBC" .
"\x1c\x02\x19\x00\x02\xBC\xBD",
[ '¼', '¼½' ]
];
// This has the magic "\x1c\x01\x5A\x00\x03\x1B\x25\x47" which marks content as UTF8.
yield 'UTF8' => [
"Photoshop 3.0\08BIM\4\4\0\0\0\0\0\x0F\x1c\x02\x19\x00\x02¼\x1c\x01\x5A\x00\x03\x1B\x25\x47",
[ '¼' ]
];
}
/**
* @covers \IPTC::parse
* @dataProvider provideParse
*/
public function testIPTCParseUTF8( $rawData, $expectedKeywords ) {
$res = IPTC::parse( $rawData );
$this->assertEquals( $expectedKeywords, $res['Keywords'] );
}
}
|