1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
|
"""
Fill a given text in a rectangle on some PDF page using
1. TextWriter object
2. Basic text output
Check text is indeed contained in given rectangle.
"""
import pymupdf
# codespell:ignore-begin
text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca.
Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner.
Kleine Schwertwale bilden Schulen von durchschnittlich zehn bis fünfzig Tieren, wobei sie sich auch mit anderen Delfinen vergesellschaften und sich meistens abseits der Küsten aufhalten.
Sie sind in allen Ozeanen gemäßigter, subtropischer und tropischer Breiten beheimatet, sind jedoch vor allem in wärmeren Jahreszeiten auch bis in die gemäßigte bis subpolare Zone südlich der Südspitze Südamerikas, vor Nordeuropa und bis vor Kanada anzutreffen."""
# codespell:ignore-end
def test_textbox1():
"""Use TextWriter for text insertion."""
doc = pymupdf.open()
page = doc.new_page()
rect = pymupdf.Rect(50, 50, 400, 400)
blue = (0, 0, 1)
tw = pymupdf.TextWriter(page.rect, color=blue)
tw.fill_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
fontsize=12,
)
tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1)))
# check text containment
assert page.get_text() == page.get_text(clip=rect)
page.write_text(writers=tw)
def test_textbox2():
"""Use basic text insertion."""
doc = pymupdf.open()
ocg = doc.add_ocg("ocg1")
page = doc.new_page()
rect = pymupdf.Rect(50, 50, 400, 400)
blue = pymupdf.utils.getColor("lightblue")
red = pymupdf.utils.getColorHSV("red")
page.insert_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
fontsize=12,
color=blue,
oc=ocg,
)
# check text containment
assert page.get_text() == page.get_text(clip=rect)
def test_textbox3():
"""Use TextWriter for text insertion."""
doc = pymupdf.open()
page = doc.new_page()
font = pymupdf.Font("cjk")
rect = pymupdf.Rect(50, 50, 400, 400)
blue = (0, 0, 1)
tw = pymupdf.TextWriter(page.rect, color=blue)
tw.fill_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
font=font,
fontsize=12,
right_to_left=True,
)
tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1)))
# check text containment
assert page.get_text() == page.get_text(clip=rect)
doc.scrub()
doc.subset_fonts()
def test_textbox4():
"""Use TextWriter for text insertion."""
doc = pymupdf.open()
ocg = doc.add_ocg("ocg1")
page = doc.new_page()
rect = pymupdf.Rect(50, 50, 400, 600)
blue = (0, 0, 1)
tw = pymupdf.TextWriter(page.rect, color=blue)
tw.fill_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
fontsize=12,
font=pymupdf.Font("cour"),
right_to_left=True,
)
tw.write_text(page, oc=ocg, morph=(rect.tl, pymupdf.Matrix(1, 1)))
# check text containment
assert page.get_text() == page.get_text(clip=rect)
def test_textbox5():
"""Using basic text insertion."""
small_glyph_heights0 = pymupdf.TOOLS.set_small_glyph_heights()
pymupdf.TOOLS.set_small_glyph_heights(True)
try:
doc = pymupdf.open()
page = doc.new_page()
r = pymupdf.Rect(100, 100, 150, 150)
text = "words and words and words and more words..."
rc = -1
fontsize = 12
page.draw_rect(r)
while rc < 0:
rc = page.insert_textbox(
r,
text,
fontsize=fontsize,
align=pymupdf.TEXT_ALIGN_JUSTIFY,
)
fontsize -= 0.5
blocks = page.get_text("blocks")
bbox = pymupdf.Rect(blocks[0][:4])
assert bbox in r
finally:
# Must restore small_glyph_heights, otherwise other tests can fail.
pymupdf.TOOLS.set_small_glyph_heights(small_glyph_heights0)
def test_2637():
"""Ensure correct calculation of fitting text."""
doc = pymupdf.open()
page = doc.new_page()
text = (
"The morning sun painted the sky with hues of orange and pink. "
"Birds chirped harmoniously, greeting the new day. "
"Nature awakened, filling the air with life and promise."
)
rect = pymupdf.Rect(50, 50, 500, 280)
fontsize = 50
rc = -1
while rc < 0: # look for largest font size that makes the text fit
rc = page.insert_textbox(rect, text, fontname="hebo", fontsize=fontsize)
fontsize -= 1
# confirm text won't lap outside rect
blocks = page.get_text("blocks")
bbox = pymupdf.Rect(blocks[0][:4])
assert bbox in rect
def test_htmlbox1():
"""Write HTML-styled text into a rect with different rotations.
The text is styled and contains a link.
Then extract the text again, and
- assert that text was written in the 4 different angles,
- assert that text properties are correct (bold, italic, color),
- assert that the link has been correctly inserted.
We try to insert into a rectangle that is too small, setting
scale=False and confirming we have a negative return code.
"""
if not hasattr(pymupdf, "mupdf"):
print("'test_htmlbox1' not executed in classic.")
return
rect = pymupdf.Rect(100, 100, 200, 200) # this only works with scale=True
base_text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."""
text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation <b>ullamco</b> <i>laboris</i> nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in <span style="color: #0f0;font-weight:bold;">voluptate</span> velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui <a href="https://www.artifex.com">officia</a> deserunt mollit anim id est laborum."""
doc = pymupdf.Document()
for rot in (0, 90, 180, 270):
wdirs = ((1, 0), (0, -1), (-1, 0), (0, 1)) # all writing directions
page = doc.new_page()
spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1)
assert spare_height < 0
assert scale == 1
spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
assert spare_height == 0
assert 0 < scale < 1
page = doc.reload_page(page)
link = page.get_links()[0] # extracts the links on the page
assert link["uri"] == "https://www.artifex.com"
# Assert plain text is complete.
# We must remove line breaks and any ligatures for this.
assert base_text == page.get_text(flags=0)[:-1].replace("\n", " ")
encounters = 0 # counts the words with selected properties
for b in page.get_text("dict")["blocks"]:
for l in b["lines"]:
wdir = l["dir"] # writing direction
assert wdir == wdirs[page.number]
for s in l["spans"]:
stext = s["text"]
color = pymupdf.sRGB_to_pdf(s["color"])
bold = bool(s["flags"] & 16)
italic = bool(s["flags"] & 2)
if stext in ("ullamco", "laboris", "voluptate"):
encounters += 1
if stext == "ullamco":
assert bold is True
assert italic is False
assert color == pymupdf.pdfcolor["black"]
elif stext == "laboris":
assert bold is False
assert italic is True
assert color == pymupdf.pdfcolor["black"]
elif stext == "voluptate":
assert bold is True
assert italic is False
assert color == pymupdf.pdfcolor["green"]
else:
assert bold is False
assert italic is False
# all 3 special special words were encountered
assert encounters == 3
def test_htmlbox2():
"""Test insertion without scaling"""
if not hasattr(pymupdf, "mupdf"):
print("'test_htmlbox2' not executed in classic.")
return
doc = pymupdf.open()
rect = pymupdf.Rect(100, 100, 200, 200) # large enough to hold text
page = doc.new_page()
bottoms = set()
for rot in (0, 90, 180, 270):
spare_height, scale = page.insert_htmlbox(
rect, "Hello, World!", scale_low=1, rotate=rot
)
assert scale == 1
assert 0 < spare_height < rect.height
bottoms.add(spare_height)
assert len(bottoms) == 1 # same result for all rotations
def test_htmlbox3():
"""Test insertion with opacity"""
if not hasattr(pymupdf, "mupdf"):
print("'test_htmlbox3' not executed in classic.")
return
rect = pymupdf.Rect(100, 250, 300, 350)
text = """<span style="color:red;font-size:20px;">Just some text.</span>"""
doc = pymupdf.open()
page = doc.new_page()
# insert some text with opacity
page.insert_htmlbox(rect, text, opacity=0.5)
# lowlevel-extract inserted text to access opacity
span = page.get_texttrace()[0]
assert span["opacity"] == 0.5
def test_3559():
if pymupdf.mupdf_version_tuple < (1, 24, 4):
print(f'test_3559(): Not running because mupdf known to SEGV.')
return
doc = pymupdf.Document()
page = doc.new_page()
text_insert="""<body><h3></h3></body>"""
rect = pymupdf.Rect(100, 100, 200, 200)
page.insert_htmlbox(rect, text_insert)
def test_3916():
doc = pymupdf.open()
rect = pymupdf.Rect(100, 100, 101, 101) # Too small for the text.
page = doc.new_page()
spare_height, scale = page.insert_htmlbox(rect, "Hello, World!", scale_low=0.5)
assert spare_height == -1
|