1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations
import logging
from os import fspath
from unittest.mock import patch
import pytest
from PIL import Image
from ocrmypdf._exec import unpaper
from ocrmypdf._plugin_manager import get_parser_options_plugins
from ocrmypdf._validation import check_options
from ocrmypdf.exceptions import ExitCode, MissingDependencyError
from .conftest import check_ocrmypdf, have_unpaper, ocrmypdf_exec, run_ocrmypdf
# pylint: disable=redefined-outer-name
needs_unpaper = pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
def test_no_unpaper(resources, no_outpdf):
input_ = fspath(resources / "c02-22.pdf")
output = fspath(no_outpdf)
_parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
with patch("ocrmypdf._exec.unpaper.version") as mock:
mock.side_effect = FileNotFoundError("unpaper")
with pytest.raises(MissingDependencyError):
check_options(options, pm)
mock.assert_called()
def test_old_unpaper(resources, no_outpdf):
input_ = fspath(resources / "c02-22.pdf")
output = fspath(no_outpdf)
_parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
with patch("ocrmypdf._exec.unpaper.version") as mock:
mock.return_value = '0.5'
with pytest.raises(MissingDependencyError):
check_options(options, pm)
mock.assert_called()
@needs_unpaper
def test_clean(resources, outpdf):
check_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
'--plugin',
'tests/plugins/tesseract_noop.py',
)
@needs_unpaper
def test_unpaper_args_valid(resources, outpdf):
check_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"--layout double", # Spaces required here
'--plugin',
'tests/plugins/tesseract_noop.py',
)
@needs_unpaper
def test_unpaper_args_invalid_filename(resources, outpdf):
p = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"/etc/passwd",
'--plugin',
'tests/plugins/tesseract_noop.py',
)
assert "No filenames allowed" in p.stderr
assert p.returncode == ExitCode.bad_args
@needs_unpaper
def test_unpaper_args_invalid(resources, outpdf):
p = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"unpaper is not going to like these arguments",
'--plugin',
'tests/plugins/tesseract_noop.py',
)
# Can't tell difference between unpaper choking on bad arguments or some
# other unpaper failure
assert p.returncode == ExitCode.child_process_error
@needs_unpaper
def test_unpaper_image_too_big(resources, outdir, caplog):
with patch('ocrmypdf._exec.unpaper.UNPAPER_IMAGE_PIXEL_LIMIT', 42):
infile = resources / 'crom.png'
unpaper.clean(infile, outdir / 'out.png', dpi=300) == infile
assert any(
'too large for cleaning' in rec.message
for rec in caplog.get_records('call')
if rec.levelno == logging.WARNING
)
|