File: 400_utf8validate.t

package info (click to toggle)
libsereal-decoder-perl 5.004%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 2,556 kB
  • sloc: ansic: 11,615; perl: 6,938; sh: 25; makefile: 9
file content (74 lines) | stat: -rw-r--r-- 2,035 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!perl

use strict;
use warnings;
use Data::Dumper;
use Test::More;
use File::Spec;

use lib File::Spec->catdir(qw(t lib));

BEGIN {
    lib->import('lib')
        if !-d 't';
}
use Sereal::TestSet;

use Sereal::Decoder qw(decode_sereal);
no warnings 'utf8';
my @valid_utf8= (
    [ latin1 => "=srl\x01\x00'\x06Au feu"          => 'Au feu' ],
    [ utf8   => "=srl\x01\x00'\x08\xc3\x80 l'eau"  => "\xC0 l'eau" ],
    [ bom    => "=srl\x01\x00'\x06\xEF\xBB\xBFfoo" => "\x{FEFF}foo" ],

    # Invalid code points that are nonetheless valid UTF8 :
    # FFFE is a non-character
    [ fffe => "=srl\x01\x00'\x03\xEF\xBF\xBE" => "\x{FFFE}" ],

    # This is binary, not utf8, so must not throw an error
    [ ffpadded => "=srl\x01\x00&\x04\xFF\xFF\xFF\xFF" => "\xFF\xFF\xFF\xFF" ],
);

my @invalid_utf8= (

    # Only FF bytes
    [ ffpadded => "=srl\x01\x00'\x04\xFF\xFF\xFF\xFF" ],

    # Overlong encoding F0 82 82 AC for U+20AC
    [ overlong => "=srl\x01\x00'\x04\xF0\x82\x82\xAC" ],

    # Not enough contination bytes
    [ continuation => "=srl\x01\x00'\x01\xC0" ],
);

plan tests => 2 * @valid_utf8 + 2 * @invalid_utf8;

for my $test (@valid_utf8) {
    my ( $name, $exp, $expected )= @$test;
    my $out;
    my $ok= eval { decode_sereal( $exp, { validate_utf8 => 1 }, $out ); 1 };
    my $err= $@ || 'Zombie error';
    ok( $ok, "$name: did not die" )
        or do {
        diag $err;
        diag "input=", Data::Dumper::qquote($exp);
        next;
        };
    is( $out, $expected, "$name: correctly decoded" );
}

for my $test (@invalid_utf8) {
    my ( $name, $exp )= @$test;
    my $out;
    my $ok= eval { decode_sereal( $exp, undef, $out ); 1 };
    my $err= $@ || 'Zombie error';
    ok( $ok, "$name: did not die" )
        or do {
        diag $err;
        diag "input=", Data::Dumper::qquote($exp);
        next;
        };
    $ok= eval { decode_sereal( $exp, { validate_utf8 => 1 }, $out ); 1 };
    $err= $@ || 'Zombie error';
    like( $err, qr/Invalid UTF8 byte sequence/, "$name: die with a UTF8 error" );
}