File: HTML.pm

package info (click to toggle)
libcatmandu-html-perl 0.02%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 176 kB
  • sloc: perl: 305; makefile: 2
file content (95 lines) | stat: -rw-r--r-- 1,652 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package Catmandu::Importer::HTML;

our $VERSION = '0.02';

use Catmandu::Sane;
use Moo;
use HTML::TokeParser;
use namespace::clean;

with 'Catmandu::Importer';

sub generator {
    my ($self) = @_;
    my $n = 0;

    my $parser = HTML::TokeParser->new($self->fh);

    sub {
        state $ready = 0;
        return undef if ($ready++);

        my $record = {};

        while (my $token = $parser->get_token) {
            push @{$record->{html}} , $token;
        }

        $record;
    };
}

1;

__END__

=pod

=head1 NAME

Catmandu::Importer::HTML - An HTML importer

=head1 SYNOPSIS

    # From the command line
    $ catmandu convert HTML to YAML < ex/test.html

    # From Perl
    use Catmandu;

    my $importer = Catmandu->importer('HTML',file => 'ex/test.html');

    my $n = $importer->each(sub {
        my $hashref = $_[0];
        # ...
    });

=head1 DESCRIPTION

This is a L<Catmandu::Importer> for converting HTML data using the
L<HTML::TokeParser> parser.

=head1 CONFIGURATION

=over

=item file

Read input from a local file given by its path. Alternatively a scalar
reference can be passed to read from a string.

=item fh

Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to
create the input stream from the C<file> argument or by using STDIN.

=item encoding

Binmode of the input stream C<fh>. Set to C<:utf8> by default.

=item fix

An ARRAY of one or more fixes or file scripts to be applied to imported items.

=back

=head1 METHODS

Every L<Catmandu::Importer> is a L<Catmandu::Iterable> all its methods are
inherited.

=head1 SEE ALSO

L<Catmandu::Importer>, L<HTML::TokeParser>

=cut