File: test_2548.py

package info (click to toggle)
pymupdf 1.25.4%2Bds1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 98,632 kB
  • sloc: python: 43,379; ansic: 75; makefile: 6
file content (41 lines) | stat: -rw-r--r-- 1,487 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os

import pymupdf

root = os.path.abspath(f'{__file__}/../..')

def test_2548():
    """Text extraction should fail because of PDF structure cycle.

    Old MuPDF version did not detect the loop.
    """
    print(f'test_2548(): {pymupdf.mupdf_version_tuple=}')
    pymupdf.TOOLS.mupdf_warnings(reset=True)
    doc = pymupdf.open(f'{root}/tests/resources/test_2548.pdf')
    e = False
    for page in doc:
        try:
            _ = page.get_text()
        except Exception as ee:
            print(f'test_2548: {ee=}')
            if hasattr(pymupdf, 'mupdf'):
                # Rebased.
                expected = "RuntimeError('code=2: cycle in structure tree')"
            else:
                # Classic.
                expected = "RuntimeError('cycle in structure tree')"
            assert repr(ee) == expected, f'Expected {expected=} but got {repr(ee)=}.'
            e = True
    wt = pymupdf.TOOLS.mupdf_warnings()
    print(f'test_2548(): {wt=}')

    # This checks that PyMuPDF 1.23.7 fixes this bug, and also that earlier
    # versions with updated MuPDF also fix the bug.
    rebased = hasattr(pymupdf, 'mupdf')
    if pymupdf.mupdf_version_tuple >= (1, 25):
        expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing'
    else:
        expected = 'Loop found in structure tree. Ignoring structure.'
    if rebased:
        assert wt == expected, f'expected:\n    {expected!r}\nwt:\n    {wt!r}\n'
    assert not e