File: escape2.t

package info (click to toggle)
libmarc-charset-perl 1.35-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,476 kB
  • sloc: xml: 99,038; perl: 774; makefile: 9
file content (60 lines) | stat: -rw-r--r-- 2,197 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
use Test::More tests=>4;

use MARC::Charset::Constants qw(:all);

## this test will exercise the first type of character escapes 
## as documents at http://lcweb.loc.gov/marc/specifications/speccharmarc8.html
## "Technique 2: Other Alternate Graphic Character Sets"

use strict;
use MARC::Charset qw(marc8_to_utf8);

## test some ASCII & Greek mixed together

my $test = 
    'this is greek' .			    ## regular ASCII
    ESCAPE . SINGLE_G0_A . BASIC_GREEK .    ## set G0 to Greek
    chr(0x49) .				    ## zeta
    ESCAPE . SINGLE_G0_A . BASIC_LATIN .    ## set GO to ASCII
    'this is not';			    ## regular ASCII

my $expected = 'this is greek' . chr(0x0396) . 'this is not';
is(marc8_to_utf8($test), $expected, 'escape type 2 to Greek');

## test some arabic, which never returns to ASCII

$test = 
    ESCAPE . SINGLE_G0_A . BASIC_ARABIC .   ## set G0 to ArabicBasic
    ESCAPE . SINGLE_G1_A . EXTENDED_ARABIC. ## set G1 to ArabicExtended
    chr(0x4d) .				    ## HAH (from Basic)
    chr(0xBA);				    ## DUL (from Extended)

$expected = chr(0x062D) . chr(0x068E);
is(marc8_to_utf8($test), $expected, 'escape type 2 to Basic+Ext Arabic');

## test some Hebrew and Arabic mixed together

$test = 
    ESCAPE . SINGLE_G0_A . BASIC_ARABIC .   ## set G0 to ArabicBasic
    ESCAPE . SINGLE_G1_A . EXTENDED_ARABIC. ## set G1 to ArabicExtended
    chr(0x47) .				    ## ALEF (Arabic Basic) 
    ESCAPE . SINGLE_G0_A . BASIC_HEBREW .   ## replace ArabicBasic with Hebrew
    chr(0x71) .				    ## SAMEKH (Hebrew)
    chr(0xE9); 				    ## RNOON (ArabicExtended)

$expected = chr(0x0627) . chr(0x05E1) . chr(0x06BB);
is(marc8_to_utf8($test), $expected, 'escape type 2 Arabic + Hebrew mixed');

## test some greek with spaces
$test = 
    ESCAPE . SINGLE_G0_A . BASIC_GREEK .    ## set G0 to Greek
    chr(0x49) .                             ## ZETA
    chr(0x4B) .                             ## THETA
    ' ' .                                   ## SPACE
    chr(0x4E) .                             ## LAMBDA
    ESCAPE . SINGLE_G0_A . BASIC_LATIN;     ## Back to ASCII 

$expected = chr(0x0396) . chr(0x0398) . ' ' . chr(0x039B);
is(marc8_to_utf8($test), $expected, 'greek with internal space');