1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations
import logging
import subprocess
from decimal import Decimal
from unittest.mock import patch
import pikepdf
import pytest
from PIL import Image, UnidentifiedImageError
from ocrmypdf._exec.ghostscript import rasterize_pdf
from ocrmypdf.exceptions import ExitCode
from ocrmypdf.helpers import Resolution
from .conftest import check_ocrmypdf, run_ocrmypdf
# pylint: disable=redefined-outer-name
@pytest.fixture
def francais(resources):
path = resources / 'francais.pdf'
return path, pikepdf.open(path)
def test_rasterize_size(francais, outdir):
path, pdf = francais
page_size_pts = (pdf.pages[0].MediaBox[2], pdf.pages[0].MediaBox[3])
assert pdf.pages[0].MediaBox[0] == pdf.pages[0].MediaBox[1] == 0
page_size = (page_size_pts[0] / Decimal(72), page_size_pts[1] / Decimal(72))
target_size = Decimal('50.0'), Decimal('30.0')
forced_dpi = Resolution(42.0, 4242.0)
rasterize_pdf(
path,
outdir / 'out.png',
raster_device='pngmono',
raster_dpi=Resolution(
target_size[0] / page_size[0], target_size[1] / page_size[1]
),
page_dpi=forced_dpi,
)
with Image.open(outdir / 'out.png') as im:
assert im.size == target_size
assert im.info['dpi'] == forced_dpi
def test_rasterize_rotated(francais, outdir, caplog):
path, pdf = francais
page_size_pts = (pdf.pages[0].MediaBox[2], pdf.pages[0].MediaBox[3])
assert pdf.pages[0].MediaBox[0] == pdf.pages[0].MediaBox[1] == 0
page_size = (page_size_pts[0] / Decimal(72), page_size_pts[1] / Decimal(72))
target_size = Decimal('50.0'), Decimal('30.0')
forced_dpi = Resolution(42.0, 4242.0)
caplog.set_level(logging.DEBUG)
rasterize_pdf(
path,
outdir / 'out.png',
raster_device='pngmono',
raster_dpi=Resolution(
target_size[0] / page_size[0], target_size[1] / page_size[1]
),
page_dpi=forced_dpi,
rotation=90,
)
with Image.open(outdir / 'out.png') as im:
assert im.size == (target_size[1], target_size[0])
assert im.info['dpi'] == forced_dpi.flip_axis()
def test_gs_render_failure(resources, outpdf):
p = run_ocrmypdf(
resources / 'blank.pdf',
outpdf,
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_render_failure.py',
)
assert 'Casper is not a friendly ghost' in p.stderr
assert p.returncode == ExitCode.child_process_error
def test_gs_raster_failure(resources, outpdf):
p = run_ocrmypdf(
resources / 'francais.pdf',
outpdf,
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_raster_failure.py',
)
assert 'Ghost story archive not found' in p.stderr
assert p.returncode == ExitCode.child_process_error
def test_ghostscript_pdfa_failure(resources, outpdf):
p = run_ocrmypdf(
resources / 'francais.pdf',
outpdf,
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_pdfa_failure.py',
)
assert (
p.returncode == ExitCode.pdfa_conversion_failed
), "Unexpected return when PDF/A fails"
def test_ghostscript_feature_elision(resources, outpdf):
check_ocrmypdf(
resources / 'francais.pdf',
outpdf,
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_feature_elision.py',
)
def test_rasterize_pdf_errors(resources, no_outpdf, caplog):
with patch('ocrmypdf._exec.ghostscript.run') as mock:
# ghostscript can produce
mock.return_value = subprocess.CompletedProcess(
['fakegs'], returncode=0, stdout=b'', stderr=b'error this is an error'
)
with pytest.raises(UnidentifiedImageError):
rasterize_pdf(
resources / 'francais.pdf',
no_outpdf,
raster_device='pngmono',
raster_dpi=Resolution(100, 100),
)
assert "this is an error" in caplog.text
assert "invalid page image file" in caplog.text
|