File: test_image_input.py

package info (click to toggle)
ocrmypdf 14.0.1%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 9,948 kB
  • sloc: python: 10,185; sh: 240; makefile: 22
file content (89 lines) | stat: -rw-r--r-- 2,411 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0

from __future__ import annotations

from unittest.mock import patch

import img2pdf
import pikepdf
import pytest
from PIL import Image

import ocrmypdf

from .conftest import check_ocrmypdf, run_ocrmypdf_api

# pylint: disable=redefined-outer-name


@pytest.fixture
def baiona(resources):
    return Image.open(resources / 'baiona_gray.png')


def test_image_to_pdf(resources, outpdf):
    check_ocrmypdf(
        resources / 'crom.png',
        outpdf,
        '--image-dpi',
        '200',
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )


def test_no_dpi_info(caplog, baiona, outdir, no_outpdf):
    im = baiona
    assert 'dpi' not in im.info
    input_image = outdir / 'baiona_no_dpi.png'
    im.save(input_image)

    rc = run_ocrmypdf_api(input_image, no_outpdf)
    assert rc == ocrmypdf.ExitCode.input_file
    assert "--image-dpi" in caplog.text


def test_dpi_not_credible(caplog, baiona, outdir, no_outpdf):
    im = baiona
    assert 'dpi' not in im.info
    input_image = outdir / 'baiona_no_dpi.png'
    im.save(input_image, dpi=(30, 30))

    rc = run_ocrmypdf_api(input_image, no_outpdf)
    assert rc == ocrmypdf.ExitCode.input_file
    assert "not credible" in caplog.text


def test_cmyk_no_icc(caplog, resources, no_outpdf):
    rc = run_ocrmypdf_api(resources / 'baiona_cmyk.jpg', no_outpdf)
    assert rc == ocrmypdf.ExitCode.input_file
    assert "no ICC profile" in caplog.text


def test_img2pdf_fails(resources, no_outpdf):
    with patch(
        'ocrmypdf._pipeline.img2pdf.convert', side_effect=img2pdf.ImageOpenError()
    ) as mock:
        rc = run_ocrmypdf_api(
            resources / 'baiona_gray.png', no_outpdf, '--image-dpi', '200'
        )
        assert rc == ocrmypdf.ExitCode.input_file
        mock.assert_called()


@pytest.mark.xfail(reason="remove background disabled")
def test_jpeg_in_jpeg_out(resources, outpdf):
    check_ocrmypdf(
        resources / 'congress.jpg',
        outpdf,
        '--image-dpi',
        '100',
        '--output-type',
        'pdf',  # specifically check pdf because Ghostscript may convert to JPEG
        '--remove-background',
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )
    with pikepdf.open(outpdf) as pdf:
        assert next(iter(pdf.pages[0].images.values())).Filter == pikepdf.Name.DCTDecode