File: 120_fallback.t

package info (click to toggle)
libunicode-utf8-perl 0.62-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 492 kB
  • sloc: perl: 2,365; sh: 6; makefile: 3
file content (106 lines) | stat: -rwxr-xr-x 2,682 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!perl

use strict;
use warnings;

use Test::More tests => 15;

BEGIN {
    use_ok('Unicode::UTF8', qw[ decode_utf8 encode_utf8 ]);
}

{
    my @positions;
    my $octets = "\x80 \xE2\x98\xBA \xF4\x80\x80 \xE0\x80\x80";
    my @exp    = ("\x80", "\xF4\x80\x80", "\xE0", "\x80", "\x80");
    my $count  = 0;

    my $fallback = sub {
        my ($octets, $is_usv, $position) = @_;

        my $exp  = shift @exp;
        my $name = sprintf '$sequence eq <%s>',
          join(' ', map { sprintf '%.4X', ord } split //, $exp);

        is($octets, $exp, $name);
        $count++;
        push @positions, $position;
    };

    {
        no warnings 'utf8';
        decode_utf8($octets, $fallback);
    }

    is($count, 5, "decode fallback invoked 5 times");
    is("@positions", "0 6 10 11 12", "got correct octet positions");
}

{
    my @tests = (
        [ "\x80 Foo \xE2\x98\xBA \xE0\x80\x80",
          "\x{FFFD} Foo \x{263A} \x{FFFD}\x{FFFD}\x{FFFD}",
          sub { return "\x{FFFD}" },
        ],
        [ "\x80 Foo \xE2\x98\xBA \xE0\x80\x80",
          "\x80 Foo \x{263A} \xE0\x80\x80",
          sub { return $_[0] }
        ],
        [ "\xEF\xB7\x90 Foo \xEF\xB7\xA0 \xE0\x80\x80",
          "! Foo ! \x{FFFD}\x{FFFD}\x{FFFD}",
          sub { return $_[1] ? '!' : "\x{FFFD}" }
        ],
    );

    foreach my $test (@tests) {
        my ($octets, $exp, $fallback) = @$test;

        my $name = sprintf 'decode_utf8(<%s>) eq <%s>',
          join(' ', map { sprintf '%.2X', ord } split //, $octets),
          join(' ', map { sprintf '%.4X', ord } split //, $exp);

        my $got = do {
            no warnings 'utf8';
            decode_utf8($octets, $fallback);
        };

        is($got, $exp, $name);
    }
}

{
    my @tests = (
        [ "\x{110000}",
          0x110000,
          sub { return $_[0] },
        ],
        [ "\x{110000} Foo \x{263A} \x{110000}",
          "\xEF\xBF\xBD Foo \xE2\x98\xBA \xEF\xBF\xBD",
          sub { return "\x{FFFD}" },
        ],
        [ "\x{110000} Foo \x{263A} \x{110000}",
          " Foo \xE2\x98\xBA ",
          sub { return '' }
        ],
        [ "\x{FDD0} Foo \x{263A} \x{FDE0}",
          "! Foo \xE2\x98\xBA !",
          sub { return $_[1] ? '!' : "\x{FFFD}" }
        ],
    );

    foreach my $test (@tests) {
        my ($string, $exp, $fallback) = @$test;

        my $name = sprintf 'encode_utf8(<%s>) eq <%s>',
          join(' ', map { sprintf '%.2X', ord } split //, $string),
          join(' ', map { sprintf '%.4X', ord } split //, $exp);

        my $got = do {
            no warnings 'utf8';
            encode_utf8($string, $fallback);
        };

        is($got, $exp, $name);
    }
}