File: eximhtml2txt

package info (click to toggle)
eximdoc4 4.99-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,884 kB
  • sloc: perl: 935; javascript: 182; makefile: 180; sh: 88; xml: 68
file content (52 lines) | stat: -rwxr-xr-x 1,221 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env perl
#
use strict;
use warnings;

use File::Spec;
use HTML::FormatText;
use HTML::TreeBuilder;

sub process_chapter {
    my $fn = shift;

    my $tree = HTML::TreeBuilder->new->parse_file($fn);
    my ($chapter) = $tree->look_down( "_tag", "div", "class", "chapter", );
    return '' unless ($chapter);
    my $formatter = HTML::FormatText->new( leftmargin => 0, rightmargin => 72 );

    my $text = $formatter->format($chapter);
    $tree->delete;
    return $text;
}

sub chapters_in_order {
    my $dir = shift;

    opendir DIR, $dir or die "opendir($dir) failed: $!\n";
    my @numeric = sort grep {/^ch\d+\.html$/} readdir(DIR);
    closedir(DIR) or die "closedir($dir) failed: $!\n";

    my @results = map {
        $_ = File::Spec->catfile($dir, $_);
        if (-l $_) {
            my $t;
            eval { $t = readlink $_ };
            $_ = File::Spec->rel2abs($t, $dir) if defined $t;
        }
        $_
    } @numeric;
    return @results;
}


my $dir = shift;
die "Need a directory\n" unless defined $dir;

foreach my $fn ( chapters_in_order($dir) ) {
    print "=" x 72, "\n";
    print $fn, "\n";
    print "=" x 72, "\n";
    print process_chapter($fn);
    print "-" x 72, "\n";
}