File: UniversalCharDet.pm

package info (click to toggle)
libhtml-html5-parser-perl 0.992-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,444 kB
  • sloc: perl: 16,129; makefile: 2; sh: 1
file content (42 lines) | stat: -rw-r--r-- 937 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
package HTML::HTML5::Parser::Charset::UniversalCharDet;

## skip Test::Tabs
use strict;
use warnings;
use IO::HTML ();

our $VERSION='0.992';
our $DEBUG;

# this really shouldn't work, but for some reason it does...
sub _detect {
	return +{ encoding => 'UTF-8' } if !utf8::is_utf8($_[0]); # huh?
	open my $fh, '<:raw', \$_[0];
	my $e = IO::HTML::sniff_encoding($fh => 'string');
	return +{ encoding => $e } if defined $e;
	return +{};
}

sub detect_byte_string ($$) {
  my $de;
  eval {
    $de = _detect $_[1];
    1;
  } or do {
    warn $@ unless $DEBUG;
    die $@ if $DEBUG;
  };
  if (defined $de and defined $de->{encoding}) {
    return lc $de->{encoding};
  } else {
    return undef;
  }
} # detect_byte_string

#Copyright 2007-2011 Wakaba <w@suika.fam.cx>
#Copyright 2009-2012 Toby Inkster <tobyink@cpan.org>
#
#This library is free software; you can redistribute it
#and/or modify it under the same terms as Perl itself.

1;