File: 02utf-8.t

package info (click to toggle)
libintl-perl 1.26-2
  • links: PTS
  • area: main
  • in suites: buster, stretch
  • size: 5,696 kB
  • ctags: 495
  • sloc: perl: 156,143; makefile: 137
file content (124 lines) | stat: -rw-r--r-- 2,974 bytes parent folder | download | duplicates (17)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#! /usr/local/bin/perl -w

# vim: syntax=perl
# vim: tabstop=4

use strict;

use Test;

BEGIN {
	plan tests => 5;
}

use Locale::Recode;

sub int2utf8;

my $codes = {};
foreach (0 .. 0xcfff
		 # 0 .. 0x11_000, 
		 # 0x10_000 .. 0x11_000,
	     # 0x200_000 .. 0x201_000,      # :-(  # Not supported by Perl 5.6
	     # 0x4_000_000 .. 0x4_001_000,  # :-(  # Not supported by Perl 5.6
         ) {
	$codes->{$_} = int2utf8 $_;
}

my $cd_int = Locale::Recode->new (from => 'UTF-8',
			     		  		 to => 'INTERNAL');
ok !$cd_int->getError;

my $cd_rev = Locale::Recode->new (from => 'INTERNAL',
								 to => 'UTF-8');
ok !$cd_rev->getError;

# Convert into internal representation.
my $result_int = 1;
while (my ($ucs4, $outbuf) = each %$codes) {
	my $result = $cd_int->recode ($outbuf);
	unless ($result && $outbuf->[0] == $ucs4) {
		$result_int = 0;
		last;
	}
}
ok $result_int;

# Convert from internal representation.
my $result_rev = 1;
if (1) {
	# FIXME: This test only succeeds with use bytes in Perl >= 5.8.0.
	# However, this will fail with Perl <= Perl 5.6.0. :-(
	# FIXME: Is it really fixed now?
while (my ($ucs4, $code) = each %$codes) {
    my $outbuf = [ $ucs4 ];
    my $result = $cd_rev->recode ($outbuf);
    unless ($result && $code eq $outbuf) {
        $result_rev = 0;
        last;
    }
}
}
ok $result_rev;

# Check handling of unknown characters.  This assumes that the 
# character set is a subset of US-ASCII.
my $test_string1 = "\xffSupergirl\xff";
$cd_rev = Locale::Recode->new (from => 'ASCII',
							   to => 'UTF-8',
							  );
$result_rev = $cd_rev->recode ($test_string1);
ok $result_rev && $test_string1 eq "�Supergirl�";

sub int2utf8
{
    my $ucs4 = shift;

    if ($ucs4 <= 0x7f) {
		return chr $ucs4;
    } elsif ($ucs4 <= 0x7ff) {
		return pack ("C2", 
			(0xc0 | (($ucs4 >> 6) & 0x1f)),
			(0x80 | ($ucs4 & 0x3f)));
    } elsif ($ucs4 <= 0xffff) {
		return pack ("C3", 
			(0xe0 | (($ucs4 >> 12) & 0xf)),
			(0x80 | (($ucs4 >> 6) & 0x3f)),
			(0x80 | ($ucs4 & 0x3f)));
    } elsif ($ucs4 <= 0x1fffff) {
		return pack ("C4", 
			(0xf0 | (($ucs4 >> 18) & 0x7)),
			(0x80 | (($ucs4 >> 12) & 0x3f)),
			(0x80 | (($ucs4 >> 6) & 0x3f)),
			(0x80 | ($ucs4 & 0x3f)));
    } elsif ($ucs4 <= 0x3ffffff) {
		return pack ("C5", 
			(0xf0 | (($ucs4 >> 24) & 0x3)),
			(0x80 | (($ucs4 >> 18) & 0x3f)),
			(0x80 | (($ucs4 >> 12) & 0x3f)),
			(0x80 | (($ucs4 >> 6) & 0x3f)),
			(0x80 | ($ucs4 & 0x3f)));
    } else {
		return pack ("C6", 
			(0xf0 | (($ucs4 >> 30) & 0x3)),
			(0x80 | (($ucs4 >> 24) & 0x1)),
			(0x80 | (($ucs4 >> 18) & 0x3f)),
			(0x80 | (($ucs4 >> 12) & 0x3f)),
			(0x80 | (($ucs4 >> 6) & 0x3f)),
			(0x80 | ($ucs4 & 0x3f)));
    }
}

# Local Variables:
# mode: perl
# perl-indent-level: 4
# perl-continued-statement-offset: 4
# perl-continued-brace-offset: 0
# perl-brace-offset: -4
# perl-brace-imaginary-offset: 0
# perl-label-offset: -4
# cperl-indent-level: 4
# cperl-continued-statement-offset: 2
# tab-width: 4
# End: