1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
#!/usr/bin/env python
# Copyright © 2008-2018 Jakub Wilk <jwilk@jwilk.net>
# Copyright © 2022-2024 FriedrichFroebel
#
# This file is part of djvulibre-python.
#
# djvulibre-python is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 as published by
# the Free Software Foundation.
#
# djvulibre-python is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
import argparse
import os
import sys
import djvu.const
import djvu.decode
import djvu.sexpr
EMPTY_TEXT_SEXPR = djvu.sexpr.Expression([djvu.const.TEXT_ZONE_PAGE, 0, 0, 0, 0, ''])
class ArgumentParser(argparse.ArgumentParser):
def __init__(self):
argparse.ArgumentParser.__init__(self)
self.add_argument('-p', '--pages', dest='pages', action='store', help='pages to process')
self.add_argument('path', metavar='DJVU-FILE', action='store', help='DjVu file to process')
def parse_args(self):
options = argparse.ArgumentParser.parse_args(self)
try:
if options.pages is not None:
pages = []
for rng in options.pages.split(','):
if '-' in rng:
x, y = map(int, options.pages.split('-', 1))
pages += range(x, y + 1)
else:
pages += [int(rng)]
options.pages = pages
except (TypeError, ValueError):
self.error('Unable to parse page numbers')
return options
def crop_text(sexpr, width, height):
if isinstance(sexpr, djvu.sexpr.ListExpression) and len(sexpr) >= 5:
tp = sexpr[0]
x0, y0, x1, y1 = (sexpr[i].value for i in range(1, 5))
if x1 < 0 or y1 < 0 or x0 >= width or y0 >= height:
return
x0 = max(0, x0)
y0 = max(0, y0)
x1 = min(x1, width)
y1 = min(y1, height)
children = (crop_text(child, width, height) for child in sexpr[5:])
children = [child for child in children if child is not None]
if not children:
return
return djvu.sexpr.Expression([tp, x0, y0, x1, y1] + children)
else:
return sexpr
class Context(djvu.decode.Context):
def handle_message(self, message):
if isinstance(message, djvu.decode.ErrorMessage):
print(message, file=sys.stderr)
# Exceptions in handle_message() are ignored, so sys.exit()
# wouldn't work here.
os._exit(1)
def process_page(self, page):
print('- Page #{0}'.format(page.n + 1), file=sys.stderr)
page.get_info()
text = crop_text(page.text.sexpr, page.width, page.height)
if not text:
text = EMPTY_TEXT_SEXPR
return text
def process(self, path, pages=None):
print('Processing {path!r}:'.format(path=path), file=sys.stderr)
document = self.new_document(djvu.decode.FileURI(path))
document.decoding_job.wait()
sed_file = sys.stdout
if pages is None:
pages = iter(document.pages)
else:
pages = (document.pages[i - 1] for i in pages)
sed_file.write('remove-txt\n')
for page in pages:
sed_file.write('select {0}\n'.format(page.n + 1))
sed_file.write('set-txt\n')
self.process_page(page).print_into(sed_file)
sed_file.write('\n.\n\n')
def main():
parser = ArgumentParser()
options = parser.parse_args()
context = Context()
context.process(options.path, options.pages)
if __name__ == '__main__':
main()
|