1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
|
#!./perl -w
BEGIN {
unless (find PerlIO::Layer 'perlio') {
print "1..0 # Skip: not perlio\n";
exit 0;
}
unless (eval { require Encode } ) {
print "1..0 # Skip: not Encode\n";
exit 0;
}
}
use Test::More tests => 24;
my $grk = "grk$$";
my $utf = "utf$$";
my $fail1 = "fa$$";
my $fail2 = "fb$$";
my $russki = "koi8r$$";
my $threebyte = "3byte$$";
if (open(GRK, ">$grk")) {
binmode(GRK, ":bytes");
# alpha beta gamma in ISO 8859-7
print GRK "\xe1\xe2\xe3";
close GRK or die "Could not close: $!";
}
{
is(open(my $i,'<:encoding(iso-8859-7)',$grk), 1);
is(open(my $o,'>:utf8',$utf), 1);
is((print $o readline $i), 1);
close($o) or die "Could not close: $!";
close($i);
}
if (open(UTF, "<$utf")) {
binmode(UTF, ":bytes");
if (ord('A') == 193) { # EBCDIC
# alpha beta gamma in UTF-EBCDIC Unicode (0x3b1 0x3b2 0x3b3)
is(scalar <UTF>, "\xb4\x58\xb4\x59\xb4\x62");
} else {
# alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
is(scalar <UTF>, "\xce\xb1\xce\xb2\xce\xb3");
}
close UTF;
}
{
use Encode;
is (open(my $i,'<:utf8',$utf), 1);
is (open(my $o,'>:encoding(iso-8859-7)',$grk), 1);
is ((scalar print $o readline $i), 1);
close($o) or die "Could not close: $!";
close($i);
}
if (open(GRK, "<$grk")) {
binmode(GRK, ":bytes");
is(scalar <GRK>, "\xe1\xe2\xe3");
close GRK;
}
$SIG{__WARN__} = sub {$warn .= $_[0]};
is (open(FAIL, ">:encoding(NoneSuch)", $fail1), undef, 'Open should fail');
like($warn, qr/^Cannot find encoding "NoneSuch" at/);
is(open(RUSSKI, ">$russki"), 1);
print RUSSKI "\x3c\x3f\x78";
close RUSSKI or die "Could not close: $!";
open(RUSSKI, "$russki");
binmode(RUSSKI, ":raw");
my $buf1;
read(RUSSKI, $buf1, 1);
# eof(RUSSKI);
binmode(RUSSKI, ":encoding(koi8-r)");
my $buf2;
read(RUSSKI, $buf2, 1);
my $offset = tell(RUSSKI);
is(ord $buf1, 0x3c);
is(ord $buf2, (ord('A') == 193) ? 0x6f : 0x3f);
is($offset, 2);
close RUSSKI;
undef $warn;
# Check there is no Use of uninitialized value in concatenation (.) warning
# due to the way @latin2iso_num was used to make aliases.
is(open(FAIL, ">:encoding(latin42)", $fail2), undef, 'Open should fail');
like($warn, qr/^Cannot find encoding "latin42" at.*line \d+\.$/);
# Create a string of chars that are 3 bytes in UTF-8
my $str = "\x{1f80}" x 2048;
# Write them to a file
open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
print F $str;
close(F);
# Read file back as UTF-8
open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
my $dstr = <F>;
close(F);
is($dstr, $str);
# Try decoding some bad stuff
open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
if (ord('A') == 193) { # EBCDIC
print F "foo\x8c\x80\x80\x80bar\n\x80foo\n";
} else {
print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
}
close(F);
open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
$dstr = join(":", <F>);
close(F);
if (ord('A') == 193) { # EBCDIC
is($dstr, "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n");
} else {
is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n");
}
# Check that PerlIO::encoding can handle custom encodings that do funny
# things with the buffer.
use Encode::Encoding;
package Extensive {
@ISA = Encode::Encoding;
__PACKAGE__->Define('extensive');
sub encode($$;$) {
my ($self,$buf,$chk) = @_;
my $leftovers = '';
if ($buf =~ /(.*\n)(?!\z)/) {
$buf = $1;
$leftovers = $';
}
if ($chk) {
undef $_[1];
my @x = (' ') x 8000; # reuse the just-freed buffer
$_[1] = $leftovers; # SvPVX now points elsewhere and is shorter
} # than bufsiz
$buf;
}
no warnings 'once';
*decode = *encode;
}
open my $fh, ">:encoding(extensive)", \$buf;
$fh->autoflush;
print $fh "doughnut\n";
print $fh "quaffee\n";
# Print something longer than the buffer that encode() shrunk:
print $fh "The beech leaves beech leaves on the beach by the beech.\n";
close $fh;
is $buf, "doughnut\nquaffee\nThe beech leaves beech leaves on the beach by"
." the beech.\n", 'buffer realloc during encoding';
$buf = "Sheila surely shod Sean\nin shoddy shoes.\n";
open $fh, "<:encoding(extensive)", \$buf;
is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n",
'buffer realloc during decoding';
package Cower {
@ISA = Encode::Encoding;
__PACKAGE__->Define('cower');
sub encode($$;$) {
my ($self,$buf,$chk) = @_;
my $leftovers = '';
if ($buf =~ /(.*\n)(?!\z)/) {
$buf = $1;
$leftovers = $';
}
if ($chk) {
no warnings; # stupid @_[1] warning
@_[1] = keys %{{$leftovers=>1}}; # shared hash key (copy-on-write)
}
$buf;
}
no warnings 'once';
*decode = *encode;
}
open $fh, ">:encoding(cower)", \$buf;
$fh->autoflush;
print $fh $_ for qw "pumping plum pits";
close $fh;
is $buf, "pumpingplumpits", 'cowing buffer during encoding';
$buf = "pumping\nplum\npits\n";
open $fh, "<:encoding(cower)", \$buf;
is join("", <$fh>), "pumping\nplum\npits\n",
'cowing buffer during decoding';
package Globber {
no warnings 'once';
@ISA = Encode::Encoding;
__PACKAGE__->Define('globber');
sub encode($$;$) {
my ($self,$buf,$chk) = @_;
$_[1] = *foo if $chk;
$buf;
}
*decode = *encode;
}
# Here we just want to test there is no crash. The actual output is not so
# important.
# We need a double eval, as scope unwinding will close the handle,
# which croaks.
# Under debugging builds with PERL_DESTRUCT_LEVEL set, we have to skip this
# test, as it triggers bug #115692, resulting in string table warnings.
require Config;
SKIP: {
skip "produces string table warnings", 2
if "@{[Config::non_bincompat_options()]}" =~ /\bDEBUGGING\b/
&& $ENV{PERL_DESTRUCT_LEVEL};
eval { eval {
open my $fh, ">:encoding(globber)", \$buf;
print $fh "Agathopous Goodfoot\n";
close $fh;
}; $e = $@};
like $@||$e, qr/Close with partial character/,
'no crash when assigning glob to buffer in encode';
$buf = "To hymn him who heard her herd herd\n";
open $fh, "<:encoding(globber)", \$buf;
my $x = <$fh>;
close $fh;
is $x, "To hymn him who heard her herd herd\n",
'no crash when assigning glob to buffer in decode';
} # SKIP
END {
1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
}
|