1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
|
#! /usr/bin/perl
#---------------------------------------------------------------------
# 10-find.t
# Copyright 2012 Christopher J. Madsen
#
# Test the find_charset_in function
#---------------------------------------------------------------------
use strict;
use warnings;
use Test::More 0.88; # done_testing
use Scalar::Util 'blessed';
use IO::HTML 'find_charset_in';
plan tests => 26;
sub test
{
my $charset = shift;
my @data = shift;
push @data, shift if ref $_[0]; # options for find_charset_in
my $name = shift;
local $Test::Builder::Level = $Test::Builder::Level + 1;
is(scalar find_charset_in(@data), $charset, $name);
} # end test
#---------------------------------------------------------------------
test 'utf-8-strict' => <<'';
<meta charset="UTF-8">
test 'utf-8-strict' => <<'';
<!-- UTF-16 is recognized only with a BOM -->
<meta charset="UTF-16BE">
test 'iso-8859-15' => <<'';
<meta charset ="ISO-8859-15">
test 'iso-8859-15' => <<'';
<meta charset= "ISO-8859-15">
test 'iso-8859-15' => <<'';
<meta charset =
"ISO-8859-15">
test 'utf-8-strict' => <<'';
<meta foo=bar some=" charset =
"ISO-8859-15">
<meta charset="UTF-8">
test 'cp1252' => <<'';
<meta charset="Windows-1252">
test undef, <<'', 'misspelled charset';
<meta charseat="Windows-1252">
test 'utf-8-strict' => <<'';
<meta charset="UTF-8">
<meta charset="Windows-1252">
<meta charseat="Windows-1252">
test 'cp1252' => <<'';
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
<title>Title</title>
test 'iso-8859-15' => <<'';
<html>
<head><!-- somebody forgot the quotes -->
<meta http-equiv=Content-Type content=text/html; charset=ISO-8859-15 />
<title>Title</title>
test 'iso-8859-15' => <<'';
<html>
<head><!-- somebody forgot the quotes -->
<meta http-equiv
=Content-Type content=text/html; charset=ISO-8859-15 />
<title>Title</title>
test 'iso-8859-15' => <<'';
<html>
<head><!-- different order -->
<meta content=text/html; charset=ISO-8859-15 http-equiv=Content-Type>
<title>Title</title>
test 'cp1252' => <<'';
<html>
<head>
<meta content="text/html;charset=ISO-8859-1" http-equiv=Content-Type>
<title>Title</title>
test undef, <<'', 'incomplete attribute';
<html>
<foo href="c06.
test 'iso-8859-15' => <<'', 'short comment';
<!--><meta charset="ISO-8859-15">-->
test 'iso-8859-15' => <<'', 'strange comment';
<!---><meta charset="ISO-8859-15">-->
test undef, <<'', 'inside comment';
<!-- ><meta charset="ISO-8859-15">-->
test undef, <<'', 'wrong pragma';
<html>
<head>
<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
<title>Title</title>
test 'utf-8-strict', <<'', {need_pragma => 0}, 'need_pragma 0';
<html>
<head>
<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
<title>Title</title>
test 'iso-8859-15' => <<'', 'bogus encoding';
<meta charset="Totally-Bogus-Encoding-That-Doesnt-Exist">
<meta charset=ISO-8859-15>
{
my $encoding = find_charset_in('<meta charset="UTF-8">', { encoding => 1 });
ok(blessed($encoding), 'encoding is an object');
is(eval { $encoding->name }, 'utf-8-strict', 'encoding is UTF-8');
}
# Tests involving bytes_to_check
test cp1252 => (' ' x 1023) . '<meta charset="ISO-8859-1">',
'found at 1023 bytes';
test undef, (' ' x 1024) . '<meta charset="ISO-8859-1">',
'not found at 1024 bytes';
{
local $IO::HTML::bytes_to_check = 1025;
test cp1252 => (' ' x 1024) . '<meta charset="ISO-8859-1">',
'found at 1024 bytes with bytes_to_check=1025';
}
done_testing;
|