File: 123_save_djvu_with_hocr.t

package info (click to toggle)
gscan2pdf 2.13.5-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 5,700 kB
  • sloc: perl: 22,713; xml: 81; makefile: 6
file content (86 lines) | stat: -rw-r--r-- 2,437 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
use warnings;
use strict;
use IPC::Cmd            qw(can_run);
use IPC::System::Simple qw(system capture);
use Encode              qw(decode_utf8 encode_utf8);
use Gscan2pdf::Helpers;
use Test::More tests => 2;

BEGIN {
    use Gscan2pdf::Document;
    use Gtk3 -init;    # Could just call init separately
}

#########################

SKIP: {
    skip 'DjVuLibre not installed', 2 unless can_run('cjb2');

    Gscan2pdf::Translation::set_domain('gscan2pdf');
    use Log::Log4perl qw(:easy);
    Log::Log4perl->easy_init($WARN);
    my $logger = Log::Log4perl::get_logger;
    Gscan2pdf::Document->setup($logger);

    # Create test image
    system( Gscan2pdf::Helpers::get_imagemagick_command(), qw(rose: test.pnm) );

    my $slist = Gscan2pdf::Document->new;

    # dir for temporary files
    my $dir = File::Temp->newdir;
    $slist->set_dir($dir);

    $slist->import_files(
        paths             => ['test.pnm'],
        finished_callback => sub {
            my $hocr = <<'EOS';
<!DOCTYPE html
 PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN
 http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
 <head>
  <meta content="ocr_line ocr_page" name="ocr-capabilities"/>
  <meta content="en" name="ocr-langs"/>
  <meta content="Latn" name="ocr-scripts"/>
  <meta content="" name="ocr-microformats"/>
  <title>OCR Output</title>
 </head>
 <body>
  <div class="ocr_page" title="bbox 0 0 70 46>
   <p class="ocr_par">
    <span class="ocr_line" title="bbox 10 10 60 11">The quick — brown fox·</span>
   </p>
  </div>
 </body>
</html>
EOS
            $slist->{data}[0][2]->import_hocr($hocr);
            $slist->{data}[0][2]->import_annotations($hocr);
            $slist->save_djvu(
                path              => 'test.djvu',
                list_of_pages     => [ $slist->{data}[0][2]{uuid} ],
                finished_callback => sub { Gtk3->main_quit }
            );
        }
    );
    Gtk3->main;

    like(
        decode_utf8( capture(qw(djvutxt test.djvu)) ),
        qr/The quick — brown fox·/,
        'DjVu with expected text'
    );
    like(
        Gscan2pdf::Document::unescape_utf8(
            capture( qw(djvused test.djvu -e), 'select 1; print-ant' )
        ),
        qr/The quick — brown fox·/,
        'DjVu with expected annotation'
    );

#########################

    unlink 'test.pnm', 'test.djvu';
    Gscan2pdf::Document->quit();
}