1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
|
# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations
import os
import pikepdf
import pytest
from ocrmypdf.exceptions import MissingDependencyError
from .conftest import check_ocrmypdf
@pytest.mark.parametrize('optimize', (0, 3))
@pytest.mark.parametrize('pdfa_level', (1, 2, 3))
def test_pdfa(resources, outpdf, optimize, pdfa_level):
try:
check_ocrmypdf(
resources / 'francais.pdf',
outpdf,
'--plugin',
'tests/plugins/tesseract_noop.py',
f'--output-type=pdfa-{pdfa_level}',
f'--optimize={optimize}',
)
except MissingDependencyError as e:
if 'pngquant' in str(e) and optimize in (2, 3) and os.name == 'nt':
pytest.xfail("pngquant currently not available on Windows")
if pdfa_level in (2, 3):
# PDF/A-2 allows ObjStm
assert b'/ObjStm' in outpdf.read_bytes()
elif pdfa_level == 1:
# PDF/A-1 might allow ObjStm, but Acrobat does not approve it, so
# we don't use it
assert b'/ObjStm' not in outpdf.read_bytes()
with pikepdf.open(outpdf) as pdf:
with pdf.open_metadata() as m:
assert m.pdfa_status == f'{pdfa_level}B'
|