File: 06_unicode.t

package info (click to toggle)
libhtml-defang-perl 1.07-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 320 kB
  • sloc: perl: 4,472; sh: 6; makefile: 2
file content (57 lines) | stat: -rw-r--r-- 2,188 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/perl -w

BEGIN { # CPAN users don't have ME::*, so use eval
  eval 'use ME::FindLibs'
}

use utf8;
use Test::More tests => 19;
use HTML::Defang;
use Encode;
use Devel::Peek;
use strict;

my ($Res, $H);
my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', ' ', ' ');

#################################
#  Check unicodeness is preserved despite internal non-unicode magic
#################################

my $Defang = HTML::Defang->new(
  tags_to_callback => [ qw(a p) ],
  tags_callback => sub {
    my ($Context, $Defang, $Angle, $Tag, $IsEndTag, $AttributeHash, $AttributesEnd, $HtmlR, $OutR) = @_;
    if ($Tag eq 'a' && !$IsEndTag) {
      ok(Encode::is_utf8(${$AttributeHash->{href}}), "attr is unicode");
      is(${$AttributeHash->{href}}, 'http://blah.com/ø', "attr unicode is correct");
      ${$AttributeHash->{href}} = 'http://blah.com/ø';
      ok(Encode::is_utf8(${$AttributeHash->{href}}), "attr is unicode2");
    } elsif ($Tag eq 'p' && !$IsEndTag) {
      ok(Encode::is_utf8($$HtmlR), "html ref is unicode");
      ok($$HtmlR =~ /\G(?=岡)/gc, "html ref unicode is correct");
    }
    return 1;
  }
);
$H = <<EOF;
<p>岡</p>
<a href="http://blah.com/ø" class="û">non-english href</a>
EOF
ok(Encode::is_utf8($H), "input is unicode");
$Res = $Defang->defang($H);
ok(Encode::is_utf8($Res), "output is unicode");
like($Res, qr{^<!--defang_p-->岡<!--/defang_p-->}, "defang preserves unicode");
like($Res, qr{^<!--defang_a href="http://blah\.com/ø" defang_class="û"-->non-english href<!--/defang_a-->}m, "defang preserves unicode2");
$H = <<EOF;
<p>岡</p>
<a href="http://blah.com/ø" class="&#251;">non-english href</a>
<style>a { color:red&#251;; }</style>
EOF
ok(Encode::is_utf8($H), "input2 is unicode");
$Res = $Defang->defang($H);
ok(Encode::is_utf8($Res), "output2 is unicode");
like($Res, qr{^<!--defang_p-->岡<!--/defang_p-->}, "defang2 preserves unicode");
like($Res, qr{^<!--defang_a href="http://blah\.com/ø" defang_class="û"-->non-english href<!--/defang_a-->}m, "defang2 preserves unicode2");
like($Res, qr(^<style><!--${CommentStartText}a \{ /\*color:redû;\*/ \}${CommentEndText}--></style>)m, "style unicode correct");