File: unicode.t

package info (click to toggle)
libhtml-tidy-perl 1.56-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 392 kB
  • ctags: 198
  • sloc: perl: 1,185; sh: 23; makefile: 7
file content (65 lines) | stat: -rw-r--r-- 1,868 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!perl -T
# Copyright (c) 2006 Jonathan Rockway <jrockway@cpan.org>

use warnings;
use strict;

use Test::More tests => 9;

use HTML::Tidy;
use Encode ();
use Carp;

my $args = { newline => 'Lf' };
my $tidy = HTML::Tidy->new($args);
$tidy->ignore( type => TIDY_INFO );

# Suck in the reference HTML document.
open( my $html_in, '<:utf8', 't/unicode.html' ) or Carp::croak( "Can't read unicode.html: $!" );
my $html = do { local $/; <$html_in> };
close $html_in;

# Suck in the correct, cleaned doc (from DATA)
binmode DATA, ':utf8';
my $reference = do {local $/; <DATA>};

# Make sure both are unicode characters (not utf-x octets).
ok(utf8::is_utf8($html), 'html is utf8');
ok(utf8::is_utf8($reference), 'reference is utf8');

my $clean = $tidy->clean( $html );
ok(utf8::is_utf8($clean), 'cleaned output is also unicode');

$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
is($clean, $reference, q{Cleanup didn't break anything});

my @messages = $tidy->messages;
is_deeply( \@messages, [], q{There still shouldn't be any errors} );

$tidy = HTML::Tidy->new($args);
isa_ok( $tidy, 'HTML::Tidy' );
my $rc = $tidy->parse( '', $html );
ok( $rc, 'Parsed OK' );
@messages = $tidy->messages;
is_deeply( \@messages, [], q{There still shouldn't be any errors} );

subtest 'Try send bytes to clean method.' => sub {
    my $html = Encode::encode('utf8',$html);
    ok(!utf8::is_utf8($html), 'html is row bytes');
    my $clean = $tidy->clean( $html );
    ok(utf8::is_utf8($clean), 'but cleaned output is string');
    $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
    is($clean, $reference, q{Cleanup didn't break anything});
};

__DATA__
<!DOCTYPE html>
<html>
<head>
<meta name="generator" content="Tidy">
<title>日本語のホムページ</title>
</head>
<body>
<p>Unicodeが好きですか?</p>
</body>
</html>