File: 01..bom.t

package info (click to toggle)
libfile-bom-perl 0.15-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 180 kB
  • ctags: 23
  • sloc: perl: 692; makefile: 2
file content (208 lines) | stat: -rw-r--r-- 5,839 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/perl

use strict;
use warnings;

use lib qw( t/lib );

use Test::More;
use Test::Framework;

use Encode qw( encode decode :fallback_all );
use Fcntl qw( :seek );

our @encodings;
BEGIN {
    # encodings to use in unseekable test
    @encodings = qw( UTF-8 UTF-16LE UTF-16BE UTF-32LE UTF-32BE );

    plan tests => 11 + (@test_files * 14) + (@encodings * 4);

    use_ok("File::BOM", ':all');
}

# Ignore known harmless warning
local $SIG{__WARN__} = sub {
    my $warning = "@_";
    if ($warning !~ /^UTF-(?:16|32)LE:Partial character/) {
	warn $warning;
    }
};

for my $file (@test_files) {
    my $file_enc = $file2enc{$file};
    is(open_bom(FH, $file2path{$file}), $file2enc{$file}, "$file: open_bom returned encoding");
    my $expect = $filecontent{$file};

    my $line = <FH>;
    chomp $line;

    is($line, $expect, "$file: test content returned OK");

    close FH;

    {
	# test defuse
	open BOMB, '<', $file2path{$file}
	    or die "Couldn't read '$file2path{$file}': $!";

	my $enc = defuse BOMB;
	is($enc, $file_enc, "$file: defuse returns correct encoding ($enc)");
	$line = <BOMB>;
	chomp $line;
	is($line, $expect, "$file: defused version content OK");

	close BOMB;
    }

    open FH, '<', $file2path{$file};
    my $first_line = <FH>;
    chomp $first_line;

    seek(FH, 0, SEEK_SET);

    is(get_encoding_from_filehandle(FH), $file_enc, "$file: get_encoding_from_filehandle returned correct encoding");

    my($enc, $offset) = get_encoding_from_bom($first_line);
    is($enc, $file_enc, "$file: get_encoding_from_bom also worked");

    {
	my $decoded = $enc ? decode($enc, substr($first_line, $offset))
			   : $first_line;

	is($decoded, $expect, "$file: .. and offset worked with substr()");
    }

    #
    # decode_from_bom()
    #
    my $result = decode_from_bom($first_line, 'UTF-8', FB_CROAK);
    is($result, $expect, "$file: decode_from_bom() scalar context");
    {
	# with default
	my $default = 'UTF-8';
	my $expect_enc = $file_enc || $default;

	my($decoded, $got_enc) = decode_from_bom($first_line, $default, FB_CROAK);

	is($decoded, $expect,      "$file: decode_from_bom() list context");
	is($got_enc, $expect_enc,  "$file: decode_from_bom() list context encoding");
    }
    {
	# without default
	my $expect_enc = $file_enc;
	my($decoded, $got_enc) = decode_from_bom($first_line, undef, FB_CROAK);

	is($decoded, $expect,      "$file: decode_from_bom() list context, no default");
	is($got_enc, $expect_enc,  "$file: decode_from_bom() list context encoding, no default");
    }

    seek(FH, 0, SEEK_SET);

    ($enc, my $spill) = get_encoding_from_stream(FH);

    $line = <FH>; chomp $line;

    is($enc, $file_enc, "$file: get_encoding_from_stream()");

    $line = $spill . $line;
    $line = decode($enc, $line) if $enc;

    is($line, $expect, "$file: read OK after get_encoding_from_stream");

    close FH;
}

# Test unseekable
SKIP: {
    my $tests = 4 * @encodings;
    skip "mkfifo not supported on this platform", $tests
	unless $fifo_supported;

    skip "mkfifo tests skipped on cygwin, set TEST_FIFO to enable them", $tests
        if $^O eq 'cygwin' && !$ENV{'TEST_FIFO'};

    for my $encoding (@encodings) {
	my($pid, $fifo, $enc, $spill, $result);

        # We need two copies of this as the encode below is destructive!
        my $expected = my $test = "Testing \x{2170}, \x{2171}, \x{2172}\n";

	my $bytes = $enc2bom{$encoding}
                  . encode($encoding, $test, FB_CROAK);

	($pid, $fifo) = write_fifo($bytes);
	($enc, $spill) = open_bom(my $fh, $fifo);
	$result = $spill . <$fh>;

	close $fh;
	waitpid($pid, 0);
	unlink $fifo;

	is($enc, $encoding,    "Read BOM correctly in unseekable $encoding file");
	is($result, $expected, "Read $encoding data from unseekable source");

	# Now test defuse too
	($pid, $fifo) = write_fifo($bytes);
	open($fh, '<:utf8', $fifo) or die "Couldn't read '$fifo': $!";
	($enc, $spill) = defuse $fh;
	$result = $spill . <$fh>;

	close $fh;
	waitpid($pid, 0);
	unlink $fifo;

	is($enc, $encoding, "defused fifo OK ($encoding)");
	is($result, $expected, "read defused fifo OK ($encoding)")
        or diag(
            "Hex dump:\n".
            "Got:      ". hexdump($result) ."\n".
            "Expected: ". hexdump($expected) ."\n".
            "Spillage: ". hexdump($spill)
        );
    }
}

# Test broken BOM
{
    my $broken_content = "\xff\xffThis file has a broken BOM";
    my $broken_file = 't/data/broken_bom.txt';
    my($enc, $spill) = open_bom(my $fh, $broken_file);
    is($enc, '', "open_bom on file with broken BOM has no encoding");
    {
	my $line = <$fh>;
	chomp $line;
	is($line, $broken_content, "handle with broken BOM returns as expected");
    }

    SKIP: {
	skip "mkfifo not supported on this platform", 3
	    unless $fifo_supported;

        skip "mkfifo tests skipped on cygwin, set TEST_FIFO to enable them", 3
            if $^O eq 'cygwin' && !$ENV{'TEST_FIFO'};

	my($pid, $fifo) = write_fifo($broken_content);
	open my $fh, '<', $fifo or die "Cannot read fifo '$fifo': $!";
	my($enc, $spill) = get_encoding_from_filehandle($fh);
	is($enc, '', "get_encoding_from_filehandle() on unseekable file broken bom");
	ok($spill, ".. spillage was produced");
	is($spill . <$fh>, $broken_content, "spillage + content as expected");

	close $fh;
	waitpid($pid, 0);
	unlink $fifo;
    }
}

# Test internals

is(File::BOM::_get_char_length('UTF-8', 0xe5), 3, '_get_char_length() on UTF-8 start byte (3)');
is(File::BOM::_get_char_length('UTF-8', 0xd5), 2, '_get_char_length() on UTF-8 start byte (2)');
is(File::BOM::_get_char_length('UTF-8', 0x7f), 1, '_get_char_langth() on UTF-8 single byte char');
is(File::BOM::_get_char_length('', ''), undef,    '_get_char_length() on undef');
is(File::BOM::_get_char_length('UTF-32BE', ''), 4,  '_get_char_length() on UTF-32');

__END__

vim: ft=perl