1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
# -*- coding: utf-8 -*-
import os
import sys
import warnings
import pytest
import camelot
testdir = os.path.dirname(os.path.abspath(__file__))
testdir = os.path.join(testdir, "files")
filename = os.path.join(testdir, "foo.pdf")
skip_on_windows = pytest.mark.skipif(
sys.platform.startswith("win"),
reason="Ghostscript not installed in Windows test environment",
)
def test_unknown_flavor():
message = "Unknown flavor specified." " Use either 'lattice' or 'stream'"
with pytest.raises(NotImplementedError, match=message):
tables = camelot.read_pdf(filename, flavor="chocolate")
def test_input_kwargs():
message = "columns cannot be used with flavor='lattice'"
with pytest.raises(ValueError, match=message):
tables = camelot.read_pdf(filename, columns=["10,20,30,40"])
def test_unsupported_format():
message = "File format not supported"
filename = os.path.join(testdir, "foo.csv")
with pytest.raises(NotImplementedError, match=message):
tables = camelot.read_pdf(filename)
@skip_on_windows
def test_no_tables_found_logs_suppressed():
filename = os.path.join(testdir, "foo.pdf")
with warnings.catch_warnings():
# the test should fail if any warning is thrown
warnings.simplefilter("error")
try:
tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e:
warning_text = str(e)
pytest.fail(f"Unexpected warning: {warning_text}")
def test_no_tables_found_warnings_suppressed():
filename = os.path.join(testdir, "empty.pdf")
with warnings.catch_warnings():
# the test should fail if any warning is thrown
warnings.simplefilter("error")
try:
tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e:
warning_text = str(e)
pytest.fail(f"Unexpected warning: {warning_text}")
def test_no_password():
filename = os.path.join(testdir, "health_protected.pdf")
message = "File has not been decrypted"
with pytest.raises(Exception, match=message):
tables = camelot.read_pdf(filename)
def test_bad_password():
filename = os.path.join(testdir, "health_protected.pdf")
message = "File has not been decrypted"
with pytest.raises(Exception, match=message):
tables = camelot.read_pdf(filename, password="wrongpass")
def test_stream_equal_length():
message = "Length of table_areas and columns" " should be equal"
with pytest.raises(ValueError, match=message):
tables = camelot.read_pdf(
filename,
flavor="stream",
table_areas=["10,20,30,40"],
columns=["10,20,30,40", "10,20,30,40"],
)
def test_image_warning():
filename = os.path.join(testdir, "image.pdf")
with warnings.catch_warnings():
warnings.simplefilter("error", category=UserWarning)
with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename)
assert (
str(e.value)
== "page-1 is image-based, camelot only works on text-based pages."
)
def test_stream_no_tables_on_page():
filename = os.path.join(testdir, "empty.pdf")
with warnings.catch_warnings():
warnings.simplefilter("error")
with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename, flavor="stream")
assert str(e.value) == "No tables found on page-1"
def test_stream_no_tables_in_area():
filename = os.path.join(testdir, "only_page_number.pdf")
with warnings.catch_warnings():
warnings.simplefilter("error")
with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename, flavor="stream")
assert str(e.value) == "No tables found in table area 1"
def test_lattice_no_tables_on_page():
filename = os.path.join(testdir, "empty.pdf")
with warnings.catch_warnings():
warnings.simplefilter("error", category=UserWarning)
with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename, flavor="lattice")
assert str(e.value) == "No tables found on page-1"
def test_lattice_unknown_backend():
message = "Unknown backend 'mupdf' specified. Please use either 'poppler' or 'ghostscript'."
with pytest.raises(NotImplementedError, match=message):
tables = camelot.read_pdf(filename, backend="mupdf")
def test_lattice_no_convert_method():
class ConversionBackend(object):
pass
message = "must implement a 'convert' method"
with pytest.raises(NotImplementedError, match=message):
tables = camelot.read_pdf(filename, backend=ConversionBackend())
def test_lattice_ghostscript_deprecation_warning():
ghostscript_deprecation_warning = (
"'ghostscript' will be replaced by 'poppler' as the default image conversion"
" backend in v0.12.0. You can try out 'poppler' with backend='poppler'."
)
with warnings.catch_warnings():
warnings.simplefilter("error")
with pytest.raises(DeprecationWarning) as e:
tables = camelot.read_pdf(filename)
assert str(e.value) == ghostscript_deprecation_warning
|