File: 02..perlio-via.t

package info (click to toggle)
libfile-bom-perl 0.18-2
links: PTS, VCS
area: main
in suites: bookworm, sid, trixie
size: 184 kB
sloc: perl: 715; makefile: 2
file content (169 lines) | stat: -rw-r--r-- 4,621 bytes
parent folder | download | duplicates (5)
#!/usr/bin/perl

use strict;
use warnings;

use lib qw( t/lib );

use Test::More;
use Test::Framework;

use Fcntl qw( :seek );
use File::BOM qw( %enc2bom );

# Expected data for "moose" tests (below)
our %should_be = (
    'UTF-8'    => "\x{ef}\x{bb}\x{bf}m\x{c3}\x{b8}\x{c3}\x{b8}se\x{e2}\x{80}\x{a6}",
    'UTF-16BE' => "\x{fe}\x{ff}\x{0}m\x{0}\x{f8}\x{0}\x{f8}\x{0}s\x{0}e &",
    'UTF-16LE' => "\x{ff}\x{fe}m\x{0}\x{f8}\x{0}\x{f8}\x{0}s\x{0}e\x{0}& ",
    'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}\x{0}\x{0}\x{0}m\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}s\x{0}\x{0}\x{0}e\x{0}\x{0} &",
    'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}m\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}s\x{0}\x{0}\x{0}e\x{0}\x{0}\x{0}& \x{0}\x{0}",
);

plan tests => 2 * @test_files + 6 * keys(%enc2bom) + keys(%should_be) + 2;

# Work around bug in older PerlIO::via
# The PerlIO::via version number was not incremented when the bug was fixed.
my $compat = $] >= 5.008007 ? '' : ':utf8';

# Ignore known harmless warning
local $SIG{__WARN__} = sub {
    my $warning = "@_";
    if ($warning !~ /^UTF-(?:16|32)LE:Partial character/) {
	warn $warning;
    }
};

for my $test_file (@test_files) {
    ok(
	open(FH, "<:via(File::BOM)$compat", $file2path{$test_file}),
	"$test_file: opened through layer"
    ) or diag "$test_file: $!";

    my $line = <FH>; chomp $line; 
    is($line, $filecontent{$test_file}, "$test_file: read OK through layer")
	or diag("HEX: ".hexdump($line));
    close FH;
}

for my $enc (sort keys %enc2bom) {
    my $file = "test_file-$enc.txt";
    ok(
	open(BOM_OUT, ">:encoding($enc):via(File::BOM)$compat", $file),
	"Opened file for writing $enc via layer"
    ) or diag "$file: $!";

    my $line_one = "Unicode text\x{2026}";
    my $test = print(BOM_OUT "$line_one\n");
    ok($test, 'print() through layer')
	or diag("print() returned ". (defined($test)?$test:'undef'));

    my $line_two = "\x{62cd}\x{8ce3}";
    $test = print(BOM_OUT "$line_two\n");
    ok($test, 'print() through layer again')
	or diag("print() returned ". (defined($test)?$test:'undef'));

    close BOM_OUT;

    # check BOM
    if (open my $fh, '<:bytes', $file) {
	read $fh, my $sample, $File::BOM::MAX_BOM_LENGTH;
	like($sample, qr/^\Q$enc2bom{$enc}/, "BOM written correctly");
	close $fh;
    }
    else {
	diag "Couldn't open $file: $!";
	fail(1);
    }

    # now re-read
    my $line;
    open(BOM_IN, "<:via(File::BOM)$compat", $file);

    $line = <BOM_IN>; chomp $line;
    is($line, $line_one, 'BOM was written successfully via layer');

    $line = <BOM_IN>; chomp $line;
    is($line, $line_two, 'BOM not written in second print call');

    close BOM_IN;

    unlink $file or diag "Couldn't remove $file: $!";
}

# Mark Fowler's "moose" test:
{
    # This is 'moose...' (with slashes in the 'o's them, and the '...'
    # as one char).  As the '...' can't be represented in latin-1 then
    # perl will store the thing internally as a utf8 string with the
    # utf8 flag enabled.
    my $moose = "m\x{f8}\x{f8}se\x{2026}";

    for my $enc (keys %should_be) {
	my $file = "moose-$enc.txt";
	open(FH, ">:encoding($enc):via(File::BOM)$compat", $file) or die "Can't write to $file: $!\n";
	print FH $moose;
	close FH;

	open(FH, '<', $file) or die "Can't read $file: $!\n";
	local $/ = undef;
	my $value = <FH>;
	close FH;

	is(
	    reasciify($value),
	    reasciify($should_be{$enc}),
	    "check file for $enc"
	);

	unlink $file or diag "Can't remove '$file': $!";
    }
}

# Spurkis' seek test
{
    use utf8;
    my $file = 't/data/utf8_data.csv';

    open my $fh, '>:utf8', $file or die "Can't write $file: $!";
    print $fh <<"END_DATA";
\x{feff}id,street,town,pc,country,english,french,chinese,arabic
'10,"écoles",zoom,12,france,auctions,"Enchères","拍賣","مزاد"
END_DATA

    open $fh, '<:via(File::BOM)', $file
	or die "Can't read $file: $!\n";

    my $first_line = <$fh>;
    my $pos = tell($fh); # position of second line
    my $rest = join('', <$fh>);

    seek($fh, 0, SEEK_SET) or die "Couldn't seek: $!";

    my $new_first_line = <$fh>;
    seek($fh, $pos, SEEK_SET) or die "Couldn't seek: $!";
    my $new_rest = join('', <$fh>);

    is($new_first_line, $first_line, "seek() works");
    is($new_rest,	      $rest, "tell() works")
	or diag "Position was $pos";

    close $fh;

    unlink $file or warn "Couldn't remove $file: $!\n";
}

# sub for moose test
sub reasciify {
    my $string = shift;
    $string = join "", map {
    my $ord = ord($_);
	($ord > 127 || ($ord < 32 && $ord != 10))
	? sprintf '\x{%x}', $ord
	: $_
    } split //, $string
}

__END__

vim: ft=perl