1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
|
#!/usr/bin/env python3
# Generate a HTML page that makes it easy to visually compare all PDF files
# that are modified in the current branch, compared to the master branch.
# USAGE: ./compare-changed-pdfs.py [test_subdir_path]
import sys, webbrowser
from functools import partial
from http.server import HTTPServer, SimpleHTTPRequestHandler
from os import scandir
from pathlib import Path
from subprocess import check_output
from jinja2 import Environment, FileSystemLoader
PORT = 8000
TEMPLATE_FILENAME = "changed_pdfs_comparison.html"
SCRIPTS_DIR = Path(__file__).parent
REPO_DIR = SCRIPTS_DIR.parent
GIT_REF = "master"
TMP_DIR = REPO_DIR / f"{GIT_REF}-checkouts"
VIEW_SLICE = slice(0, 50)
def scantree_dirs(path):
"Recursively yield DirEntry objects for all sub-directories in the given folder"
yield path
for entry in scandir(path):
if entry.is_dir():
yield from scantree_dirs(entry.path)
target_dir = sys.argv[1] if len(sys.argv) > 1 else "test/"
print(f"Processing all PDF reference files in directory {target_dir}")
stdout = check_output(f"git diff --name-status {GIT_REF}", shell=True)
changed_pdf_files = [
line[1:].strip()
for line in stdout.decode("utf-8").splitlines()
if line.startswith(f"M\t{target_dir}") and line.endswith(".pdf")
]
changes_pdf_files_count = len(changed_pdf_files)
changed_pdf_files = changed_pdf_files[VIEW_SLICE]
is_shrunk = len(changed_pdf_files) < changes_pdf_files_count
for changed_pdf_file in changed_pdf_files:
(TMP_DIR / Path(changed_pdf_file)).parent.mkdir(exist_ok=True, parents=True)
command = f"git show {GIT_REF}:{changed_pdf_file} > {TMP_DIR}/{changed_pdf_file}"
print(command)
check_output(command, shell=True)
env = Environment(
loader=FileSystemLoader(str(SCRIPTS_DIR)),
autoescape=True,
trim_blocks=True,
lstrip_blocks=True,
)
template = env.get_template(TEMPLATE_FILENAME)
(REPO_DIR / TEMPLATE_FILENAME).write_text(
template.render(
changed_pdf_files=changed_pdf_files,
is_shrunk=is_shrunk,
changes_pdf_files_count=changes_pdf_files_count,
GIT_REF=GIT_REF,
VIEW_SLICE=VIEW_SLICE,
)
)
httpd = HTTPServer(
("", PORT), partial(SimpleHTTPRequestHandler, directory=str(REPO_DIR))
)
webbrowser.open(f"http://localhost:{PORT}/{TEMPLATE_FILENAME}")
httpd.serve_forever()
|