File: compare-changed-pdfs.py

package info (click to toggle)
fpdf2 2.8.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 53,828 kB
  • sloc: python: 39,486; sh: 133; makefile: 12
file content (75 lines) | stat: -rwxr-xr-x 2,357 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3

# Generate a HTML page that makes it easy to visually compare all PDF files
# that are modified in the current branch, compared to the master branch.

# USAGE: ./compare-changed-pdfs.py [test_subdir_path]

import sys, webbrowser
from functools import partial
from http.server import HTTPServer, SimpleHTTPRequestHandler
from os import scandir
from pathlib import Path
from subprocess import check_output

from jinja2 import Environment, FileSystemLoader

PORT = 8000
TEMPLATE_FILENAME = "changed_pdfs_comparison.html"

SCRIPTS_DIR = Path(__file__).parent
REPO_DIR = SCRIPTS_DIR.parent
GIT_REF = "master"
TMP_DIR = REPO_DIR / f"{GIT_REF}-checkouts"
VIEW_SLICE = slice(0, 50)


def scantree_dirs(path):
    "Recursively yield DirEntry objects for all sub-directories in the given folder"
    yield path
    for entry in scandir(path):
        if entry.is_dir():
            yield from scantree_dirs(entry.path)


target_dir = sys.argv[1] if len(sys.argv) > 1 else "test/"
print(f"Processing all PDF reference files in directory {target_dir}")

stdout = check_output(f"git diff --name-status {GIT_REF}", shell=True)
changed_pdf_files = [
    line[1:].strip()
    for line in stdout.decode("utf-8").splitlines()
    if line.startswith(f"M\t{target_dir}") and line.endswith(".pdf")
]
changes_pdf_files_count = len(changed_pdf_files)
changed_pdf_files = changed_pdf_files[VIEW_SLICE]
is_shrunk = len(changed_pdf_files) < changes_pdf_files_count

for changed_pdf_file in changed_pdf_files:
    (TMP_DIR / Path(changed_pdf_file)).parent.mkdir(exist_ok=True, parents=True)
    command = f"git show {GIT_REF}:{changed_pdf_file} > {TMP_DIR}/{changed_pdf_file}"
    print(command)
    check_output(command, shell=True)

env = Environment(
    loader=FileSystemLoader(str(SCRIPTS_DIR)),
    autoescape=True,
    trim_blocks=True,
    lstrip_blocks=True,
)
template = env.get_template(TEMPLATE_FILENAME)
(REPO_DIR / TEMPLATE_FILENAME).write_text(
    template.render(
        changed_pdf_files=changed_pdf_files,
        is_shrunk=is_shrunk,
        changes_pdf_files_count=changes_pdf_files_count,
        GIT_REF=GIT_REF,
        VIEW_SLICE=VIEW_SLICE,
    )
)

httpd = HTTPServer(
    ("", PORT), partial(SimpleHTTPRequestHandler, directory=str(REPO_DIR))
)
webbrowser.open(f"http://localhost:{PORT}/{TEMPLATE_FILENAME}")
httpd.serve_forever()