1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
use Test::More tests=>4;
use MARC::Charset::Constants qw(:all);
## this test will exercise the first type of character escapes
## as documents at http://lcweb.loc.gov/marc/specifications/speccharmarc8.html
## "Technique 2: Other Alternate Graphic Character Sets"
use strict;
use MARC::Charset qw(marc8_to_utf8);
## test some ASCII & Greek mixed together
my $test =
'this is greek' . ## regular ASCII
ESCAPE . SINGLE_G0_A . BASIC_GREEK . ## set G0 to Greek
chr(0x49) . ## zeta
ESCAPE . SINGLE_G0_A . BASIC_LATIN . ## set GO to ASCII
'this is not'; ## regular ASCII
my $expected = 'this is greek' . chr(0x0396) . 'this is not';
is(marc8_to_utf8($test), $expected, 'escape type 2 to Greek');
## test some arabic, which never returns to ASCII
$test =
ESCAPE . SINGLE_G0_A . BASIC_ARABIC . ## set G0 to ArabicBasic
ESCAPE . SINGLE_G1_A . EXTENDED_ARABIC. ## set G1 to ArabicExtended
chr(0x4d) . ## HAH (from Basic)
chr(0xBA); ## DUL (from Extended)
$expected = chr(0x062D) . chr(0x068E);
is(marc8_to_utf8($test), $expected, 'escape type 2 to Basic+Ext Arabic');
## test some Hebrew and Arabic mixed together
$test =
ESCAPE . SINGLE_G0_A . BASIC_ARABIC . ## set G0 to ArabicBasic
ESCAPE . SINGLE_G1_A . EXTENDED_ARABIC. ## set G1 to ArabicExtended
chr(0x47) . ## ALEF (Arabic Basic)
ESCAPE . SINGLE_G0_A . BASIC_HEBREW . ## replace ArabicBasic with Hebrew
chr(0x71) . ## SAMEKH (Hebrew)
chr(0xE9); ## RNOON (ArabicExtended)
$expected = chr(0x0627) . chr(0x05E1) . chr(0x06BB);
is(marc8_to_utf8($test), $expected, 'escape type 2 Arabic + Hebrew mixed');
## test some greek with spaces
$test =
ESCAPE . SINGLE_G0_A . BASIC_GREEK . ## set G0 to Greek
chr(0x49) . ## ZETA
chr(0x4B) . ## THETA
' ' . ## SPACE
chr(0x4E) . ## LAMBDA
ESCAPE . SINGLE_G0_A . BASIC_LATIN; ## Back to ASCII
$expected = chr(0x0396) . chr(0x0398) . ' ' . chr(0x039B);
is(marc8_to_utf8($test), $expected, 'greek with internal space');
|