1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
#! /usr/bin/env perl
use strict;
use warnings;
use Test::More;
use Try::Tiny;
use Log::Any '$log';
use Log::Any::Adapter 'TAP';
use Data::TableReader::Decoder::CSV;
my $csvmod;
plan skip_all => 'Need a CSV parser for this test'
unless try { $csvmod= Data::TableReader::Decoder::CSV->default_csv_module };
note "CSV decoder is ".$csvmod." version ".$csvmod->VERSION;
my $log_fn= sub { $log->can($_[0])->($log, $_[1]) };
sub test_basic {
my $input= ascii();
open my $input_fh, '<', \$input or die;
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
'CSV decoder' );
ok( my $iter= $d->iterator, 'got iterator' );
is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row' );
is( $iter->row, 1, 'row=1' );
is( $iter->dataset_idx, 0, 'dataset_idx=0' );
is_deeply( $iter->(), [ '1', '2', '3', '4' ], 'second row' );
is( $iter->row, 2, 'row=2' );
is_deeply( $iter->(), undef, 'no third row' );
is( $iter->row, 2, 'row=2' );
is( $iter->dataset_idx, 0, 'dataset_idx=0' );
ok( $iter->seek(0), 'rewind' );
is( $iter->row, 0, 'row=0' );
is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row again' );
is( $iter->row, 1, 'row=1' );
is_deeply( $iter->([2,1]), [ '3', '2' ], 'slice from second row' );
ok( !$iter->next_dataset, 'no next dataset' );
is( $iter->dataset_idx, 0, 'dataset_idx=0' );
}
sub test_multi_iterator {
my $input= ascii();
open my $input_fh, '<', \$input or die;
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
'CSV decoder' );
ok( my $iter= $d->iterator, 'create first iterator' );
# This might be supported in the future, but for now ensure it dies
like( (try { $d->iterator } catch {$_}), qr/multiple iterator/i, 'error for multiple iterators' );
undef $iter; # release old iterator, freeing up the file handle to create a new one
ok( $iter= $d->iterator, 'new iterator' );
is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row again' );
}
sub test_utf_bom {
for my $input_fn (qw( utf8_bom utf16_le_bom utf16_be_bom utf8_nobom deceptive_utf8_nobom )) {
subtest "seekable $input_fn" => sub {
my $input= main->$input_fn;
open my $input_fh, '<', \$input or die;
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
"CSV decoder for $input_fn" );
ok( my $iter= $d->iterator, 'got iterator' );
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' );
is_deeply( $iter->(), [ "\x{27000}" ], 'third row' );
is_deeply( $iter->(), undef, 'no fourth row' );
ok( $iter->seek(0), 'rewind' );
# workaround for a perl bug! the input string gets corrupted
substr($input,0,8)= substr(main->$input_fn,0,8);
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->([0,3]), [ "\x{8A66}\x{3057}", 3 ], 'slice from second row' );
ok( !$iter->next_dataset, 'no next dataset' );
};
subtest "nonseekable $input_fn" => sub {
my $input= main->$input_fn;
pipe(my ($input_fh, $out_fh)) or die "pipe: $!";
print $out_fh $input or die "print(pipe_out): $!";
close $out_fh or die "close: $!";
my $d= new_ok( 'Data::TableReader::Decoder::CSV',
[ file_name => '', file_handle => $input_fh, _log => $log_fn ],
"CSV decoder for $input_fn" );
if ($input_fn =~ /deceptive/) {
# Some inputs on non-seekable file handles will result in this exception.
# This is expected.
like( (try { $d->iterator } catch {$_}), qr/seek/, 'can\'t seek exception' );
} else {
ok( my $iter= $d->iterator, 'got iterator' );
like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' );
is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' );
is_deeply( $iter->(), [ "\x{27000}" ], 'third row' );
is_deeply( $iter->(), undef, 'no fourth row' );
ok( !$iter->next_dataset, 'no next dataset' );
}
};
}
}
subtest basic => \&test_basic;
subtest multi_iter => \&test_multi_iterator;
subtest utf_bom => \&test_utf_bom;
done_testing;
sub ascii {
return <<END;
a,b,c,d
1,2,3,4
END
}
sub utf8_bom {
# BOM "test\n"
# "\x{8A66}\x{3057},1,2,3\n"
# "\x{27000}\n"
return "\xEF\xBB\xBF"
."test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
sub utf16_le_bom {
return "\xFF\xFE"
."t\0e\0s\0t\0\n\0"
."\x66\x8A\x57\x30,\x001\x00,\x002\x00,\x003\x00\n\x00"
."\x5C\xD8\x00\xDC\n\0";
}
sub utf16_be_bom {
return "\xFE\xFF"
."\x00t\x00e\x00s\x00t\x00\n"
."\x8A\x66\x30\x57\x00,\x001\x00,\x002\x00,\x003\x00\n"
."\xD8\x5C\xDC\x00\0\n";
}
sub utf8_nobom {
return "test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
sub deceptive_utf8_nobom {
return "\xEF\xBF\xBD"
."test\n"
."\xE8\xA9\xA6\xE3\x81\x97,1,2,3\n"
."\xF0\xA7\x80\x80\n";
}
|