1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use Test::More;
use Encode qw( encode from_to );
use File::LoadLines;
# Reference data.
my @data = ( "{title: Swing Low Sweet Chariot}", "{subtitle: Sub Títlë}" );
mkdir("out") unless -d "out";
# Recode to UTF-8.
my $data = join("\n", @data) . "\n";
$data = encode("UTF-8", $data);
my @BOMs = qw( UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE );
my @noBOMs = qw( ISO-8859-1 UTF-8 );
my %enc2bom = map { $_ => encode($_, "\x{feff}") } @BOMs;
enctest( $_, 1 ) for @noBOMs;
enctest($_) for @BOMs;
done_testing( 4 * 3 * (@noBOMs + @BOMs) );
sub enctest {
my ( $enc, $nobom ) = @_;
my $encoded = $data;
_enctest( $encoded, $enc, $nobom );
$encoded = $data;
$encoded =~ s/\n/\x0a/g;
_enctest( $encoded, $enc, $nobom, "LF" );
$encoded = $data;
$encoded =~ s/\n/\x0d/g;
_enctest( $encoded, $enc, $nobom, "CR" );
$encoded = $data;
$encoded =~ s/\n/\x0d\x0a/g;
_enctest( $encoded, $enc, $nobom, "CRLF" );
}
sub _enctest {
my ( $encoded, $enc, $nobom, $crlf ) = @_;
from_to( $encoded, "UTF-8", $enc );
unless ( $nobom ) {
BAIL_OUT("Unknown encoding: $enc") unless $enc2bom{$enc};
$encoded = $enc2bom{$enc} . $encoded;
}
my $fn = "out/$enc.cho";
open( my $fh, ">:raw", $fn ) or die("$fn: $!\n");
print $fh $encoded;
close($fh);
$enc .= " (no BOM)" if $nobom;
$enc .= " ($crlf)" if $crlf;
my $opts = { fail => "soft" };
my @d = loadlines( $fn, $opts );
note("$fn: " . $opts->{error} ) unless @d;
ok( scalar( @d ) == 2, "$enc: Two lines" );
is( $d[0], $data[0], "$enc: Line 1" );
is( $d[1], $data[1], "$enc: Line 2" );
unlink($fn);
}
|