File: pdfoutline.pl

package info (click to toggle)
fntsample 5.4-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 276 kB
  • sloc: ansic: 838; perl: 223; makefile: 7
file content (154 lines) | stat: -rwxr-xr-x 4,045 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#! /usr/bin/env perl
# This file is in public domain
# Author: Ievgenii Meshcheriakov <eugen@debian.org>
#
# This program adds outlines to pdf files.
# Usage: pdfoutline input.pdf outline.txt out.pdf
#
# File given as second argument should contain outline information in
# form:
#
# <level> <page> Some text
#
# where <level> and <page> are integers. Values for <level> should be greater
# or equal than that of first line. Page numeration starts with 1.
#
# Outlines file can contain comments that start with # in first column. Comments
# and empty lines are ignored.
#
# Example file:
# 0 1 Document title
# 1 1 Chapter 1
# 2 1 Chapter 1.1
# 2 2 Chapter 1.2
# 1 3 Chapter 2
#
# This file will result in outline like the following:
#
# Document title
# +-Chapter 1
# | +-Chapter 1.1
# | +-Chapter 1.2
# +-Chapter 2

use strict;
use warnings;
use PDF::API2;
use Locale::TextDomain('@CMAKE_PROJECT_NAME@', '@CMAKE_INSTALL_FULL_LOCALEDIR@');
use POSIX qw(:locale_h);
use Encode qw(encode);

sub usage {
    printf __"Usage: %s input.pdf outline.txt out.pdf\n", $0;
}

# get first non-empty non-comment line
sub get_line {
    my ($F) = @_;
    my $line;

    while ($line = <$F>) {
        chomp $line;
        # skip comments ...
        next if $line =~ /^#/;
        # ... and empty lines
        next if $line eq q{};
        last;
    }
    return $line;
}

# Encode string to UTF-16BE with BOM if it contains non-ASCII characters
sub encode_pdf_text {
    my ($str) = @_;

    if ($str !~ /[^[:ascii:]]/) {
        return $str;
    } else {
        if (PDF::API2->VERSION ge "2.034") {
            # Perl PDF::API2 >= 2.034 already handles non-ASCII characters
            # automatically. This also avoids a bug before v2.040.
            # See: https://rt.cpan.org/Public/Bug/Display.html?id=33497
            return $str;
        } else {
            # Buggy before PDF::API2 v2.040.
            # See: https://rt.cpan.org/Public/Bug/Display.html?id=134957
            return encode('UTF-16', $str);
        }
    }
}

sub add_outlines {
    my ($pdf, $parent, $line, $F) = @_;
    my $cur_outline;

    my ($level) = split / /, $line;

    MAINLOOP: while ($line) {
        my ($new_level, $page, $text) = split / /, $line, 3;

        if ($new_level > $level) {
            $line = add_outlines($pdf, $cur_outline, $line, $F);
            next MAINLOOP;
        } elsif ($new_level < $level) {
            return $line;
        } else {
            $cur_outline = $parent->outline;
            $cur_outline->title(encode_pdf_text($text));
            # FIXME it should be posible to make it easier
            my $pdfpage = $pdf->{pagestack}->[$page - 1];
            $cur_outline->dest($pdfpage);
        }

        $line = get_line($F);
    }
}

# Create new outlines object ignorig outlines that can be
# already present in the PDF file.
sub new_outlines {
    my ($pdf) = @_;

    require PDF::API2::Outlines;
    $pdf->{'pdf'}->{'Root'}->{'Outlines'} = PDF::API2::Outlines->new($pdf);
    my $obj = $pdf->{'pdf'}->{'Root'}->{'Outlines'};

    $pdf->{'pdf'}->new_obj($obj) unless $obj->is_obj($pdf->{'pdf'});
    $pdf->{'pdf'}->out_obj($obj);
    $pdf->{'pdf'}->out_obj($pdf->{'pdf'}->{'Root'});

    return $obj;
}

setlocale(LC_ALL, q{});

if ($#ARGV != 2) {
    usage;
    exit 1;
}

if (PDF::API2->VERSION le "2.033") {
    print STDERR "Warning: Perl PDF::API2 v2.033 or earlier detected.\n";
    print STDERR "It's known to have an outline corruption bug!\n";
    print STDERR "See pdfoutline man page for more information.\n";
}

my ($inputfile, $outlinefile, $outputfile) = @ARGV;

my $pdf = PDF::API2->open($inputfile);

open my $outline_fh, '<:encoding(UTF-8)', $outlinefile
    or die __x("Cannot open outline file '{outlinefile}'",
               outlinefile => $outlinefile);

my $line = get_line($outline_fh);

# create new outlines here, don't try to use old ones
my $outlines = new_outlines($pdf);

add_outlines($pdf, $outlines, $line, $outline_fh) if $line;
close $outline_fh;

$pdf->saveas($outputfile);

exit 0;