File: 10-find.t

package info (click to toggle)
libio-html-perl 1.004-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 176 kB
  • sloc: perl: 463; makefile: 2; sh: 1
file content (147 lines) | stat: -rw-r--r-- 3,533 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#! /usr/bin/perl
#---------------------------------------------------------------------
# 10-find.t
# Copyright 2012 Christopher J. Madsen
#
# Test the find_charset_in function
#---------------------------------------------------------------------

use strict;
use warnings;

use Test::More 0.88;            # done_testing
use Scalar::Util 'blessed';

use IO::HTML 'find_charset_in';

plan tests => 26;

sub test
{
  my $charset = shift;
  my @data = shift;
  push @data, shift if ref $_[0]; # options for find_charset_in
  my $name = shift;

  local $Test::Builder::Level = $Test::Builder::Level + 1;

  is(scalar find_charset_in(@data), $charset, $name);
} # end test

#---------------------------------------------------------------------
test 'utf-8-strict' => <<'';
<meta charset="UTF-8">

test 'utf-8-strict' => <<'';
<!-- UTF-16 is recognized only with a BOM -->
<meta charset="UTF-16BE">

test 'iso-8859-15' => <<'';
<meta charset ="ISO-8859-15">

test 'iso-8859-15' => <<'';
<meta charset= "ISO-8859-15">

test 'iso-8859-15' => <<'';
<meta charset =
 "ISO-8859-15">

test 'utf-8-strict' => <<'';
<meta foo=bar some=" charset =
 "ISO-8859-15">
<meta charset="UTF-8">

test 'cp1252' => <<'';
<meta charset="Windows-1252">

test undef, <<'', 'misspelled charset';
<meta charseat="Windows-1252">

test 'utf-8-strict' => <<'';
<meta charset="UTF-8">
<meta charset="Windows-1252">
<meta charseat="Windows-1252">

test 'cp1252' => <<'';
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
<title>Title</title>

test 'iso-8859-15' => <<'';
<html>
<head><!-- somebody forgot the quotes -->
<meta http-equiv=Content-Type content=text/html; charset=ISO-8859-15 />
<title>Title</title>

test 'iso-8859-15' => <<'';
<html>
<head><!-- somebody forgot the quotes -->
<meta http-equiv
=Content-Type content=text/html; charset=ISO-8859-15 />
<title>Title</title>

test 'iso-8859-15' => <<'';
<html>
<head><!-- different order -->
<meta content=text/html; charset=ISO-8859-15 http-equiv=Content-Type>
<title>Title</title>

test 'cp1252' => <<'';
<html>
<head>
<meta content="text/html;charset=ISO-8859-1" http-equiv=Content-Type>
<title>Title</title>

test undef, <<'', 'incomplete attribute';
<html>
<foo href="c06.

test 'iso-8859-15' => <<'', 'short comment';
<!--><meta charset="ISO-8859-15">-->

test 'iso-8859-15' => <<'', 'strange comment';
<!---><meta charset="ISO-8859-15">-->

test undef, <<'', 'inside comment';
<!-- ><meta charset="ISO-8859-15">-->

test undef, <<'', 'wrong pragma';
<html>
<head>
<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
<title>Title</title>

test 'utf-8-strict', <<'', {need_pragma => 0}, 'need_pragma 0';
<html>
<head>
<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
<title>Title</title>

test 'iso-8859-15' => <<'', 'bogus encoding';
<meta charset="Totally-Bogus-Encoding-That-Doesnt-Exist">
<meta charset=ISO-8859-15>

{
  my $encoding = find_charset_in('<meta charset="UTF-8">', { encoding => 1 });

  ok(blessed($encoding), 'encoding is an object');

  is(eval { $encoding->name }, 'utf-8-strict', 'encoding is UTF-8');
}

# Tests involving bytes_to_check

test cp1252 => (' ' x 1023) . '<meta charset="ISO-8859-1">',
    'found at 1023 bytes';

test undef, (' ' x 1024) . '<meta charset="ISO-8859-1">',
    'not found at 1024 bytes';

{
  local $IO::HTML::bytes_to_check = 1025;
  test cp1252 => (' ' x 1024) . '<meta charset="ISO-8859-1">',
      'found at 1024 bytes with bytes_to_check=1025';
}

done_testing;