File: test_unpaper.py

package info (click to toggle)
ocrmypdf 14.0.1%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 9,948 kB
  • sloc: python: 10,185; sh: 240; makefile: 22
file content (116 lines) | stat: -rw-r--r-- 3,266 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0

from __future__ import annotations

import logging
from os import fspath
from unittest.mock import patch

import pytest
from PIL import Image

from ocrmypdf._exec import unpaper
from ocrmypdf._plugin_manager import get_parser_options_plugins
from ocrmypdf._validation import check_options
from ocrmypdf.exceptions import ExitCode, MissingDependencyError

from .conftest import check_ocrmypdf, have_unpaper, ocrmypdf_exec, run_ocrmypdf

# pylint: disable=redefined-outer-name

needs_unpaper = pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")


def test_no_unpaper(resources, no_outpdf):
    input_ = fspath(resources / "c02-22.pdf")
    output = fspath(no_outpdf)

    _parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
    with patch("ocrmypdf._exec.unpaper.version") as mock:
        mock.side_effect = FileNotFoundError("unpaper")

        with pytest.raises(MissingDependencyError):
            check_options(options, pm)
        mock.assert_called()


def test_old_unpaper(resources, no_outpdf):
    input_ = fspath(resources / "c02-22.pdf")
    output = fspath(no_outpdf)

    _parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
    with patch("ocrmypdf._exec.unpaper.version") as mock:
        mock.return_value = '0.5'

        with pytest.raises(MissingDependencyError):
            check_options(options, pm)
        mock.assert_called()


@needs_unpaper
def test_clean(resources, outpdf):
    check_ocrmypdf(
        resources / "skew.pdf",
        outpdf,
        "-c",
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )


@needs_unpaper
def test_unpaper_args_valid(resources, outpdf):
    check_ocrmypdf(
        resources / "skew.pdf",
        outpdf,
        "-c",
        "--unpaper-args",
        "--layout double",  # Spaces required here
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )


@needs_unpaper
def test_unpaper_args_invalid_filename(resources, outpdf):
    p = run_ocrmypdf(
        resources / "skew.pdf",
        outpdf,
        "-c",
        "--unpaper-args",
        "/etc/passwd",
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )
    assert "No filenames allowed" in p.stderr
    assert p.returncode == ExitCode.bad_args


@needs_unpaper
def test_unpaper_args_invalid(resources, outpdf):
    p = run_ocrmypdf(
        resources / "skew.pdf",
        outpdf,
        "-c",
        "--unpaper-args",
        "unpaper is not going to like these arguments",
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )
    # Can't tell difference between unpaper choking on bad arguments or some
    # other unpaper failure
    assert p.returncode == ExitCode.child_process_error


@needs_unpaper
def test_unpaper_image_too_big(resources, outdir, caplog):
    with patch('ocrmypdf._exec.unpaper.UNPAPER_IMAGE_PIXEL_LIMIT', 42):
        infile = resources / 'crom.png'
        unpaper.clean(infile, outdir / 'out.png', dpi=300) == infile

        assert any(
            'too large for cleaning' in rec.message
            for rec in caplog.get_records('call')
            if rec.levelno == logging.WARNING
        )