1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
#!/usr/bin/env python
"""
This takes doctest files and turns them into standalone scripts.
"""
import pathlib
import re
import click
not_wsp = re.compile(r"\S+")
code_eval = re.compile(r"(?<=\s{4})([>]{3}|[.]{3})\s")
code_block = re.compile(r".. (jupyter-execute|doctest)::")
block_option = re.compile(r"\s+:[a-z\-]+:")
raise_option = re.compile(r"\s+:raises:")
plotly_show = re.compile(r"\s*[a-z]+.*\.show\(")
ipython_magic = re.compile(r"^\s*%[a-zA-Z]+")
def _end_of_block(line, indent):
if match := not_wsp.search(line):
return match.start() == indent
return False
def get_error_type(line):
if raise_option.search(line) is None:
return None
return raise_option.sub("", line).strip()
def get_path_update(rootdir: pathlib.Path) -> str:
"""returns code block to allow import set_working_directory"""
swd_script = pathlib.Path("set_working_directory.py").absolute()
assert swd_script.exists()
block = [
"import sys",
f"sys.path.append({str(swd_script.parent)!r})",
"import os",
f"os.chdir({str(rootdir)!r})",
'os.environ["OMP_NUM_THREADS"] = "1"',
'os.environ["OPENBLAS_NUM_THREADS"] = "1"',
'os.environ["MKL_NUM_THREADS"] = "1"',
'os.environ["VECLIB_MAXIMUM_THREADS"] = "1"',
'os.environ["NUMEXPR_NUM_THREADS"] = "1"',
]
return "\n".join(block)
def deindent(line):
if line.startswith(" " * 4):
line = line[4:]
return line
def get_code_block_line_numbers(doc):
"""returns the (start, end) of codeblock sections"""
lines = []
in_code_block = False
start = None
indent = 0
for i, line in enumerate(doc):
if hit := code_block.search(line):
if in_code_block:
lines.append((start, i - 1, indent))
start = i
in_code_block = True
indent = hit.start()
continue
if in_code_block and _end_of_block(line, indent):
lines.append((start, i, indent))
in_code_block = False
continue
if in_code_block:
if i == len(doc) - 1:
i += 1
lines.append((start, i, indent))
return lines
def format_block(block, indent):
"""handles exceptions, de-indent, etc..."""
error_type = get_error_type(block[0])
format_line = (lambda x: x) if error_type else deindent
code = [format_line(l[indent:]) for l in block if not block_option.search(l)]
if error_type:
code.insert(0, "try:")
code.extend([f"except {error_type}:", " pass"])
for i, l in enumerate(code):
if plotly_show.search(l) or ipython_magic.search(l):
# comment out as cannot be executed in script
code[i] = f"# {l}"
return code
def get_code_blocks(doc: list[str], working_dir: pathlib.Path) -> str:
coords = get_code_block_line_numbers(doc)
refactored = [get_path_update(working_dir)]
for start, end, indent in coords:
code = format_block(doc[start + 1 : end], indent)
refactored.extend([""] + code)
return "\n".join(refactored)
def _rst_path(*args):
path = pathlib.Path(args[-1])
assert path.suffix == ".rst"
if not path.exists():
click.secho(f"ERROR: {str(path)} does not exist", fg="red")
exit(1)
return path
@click.command(no_args_is_help=True, context_settings={"show_default": True})
@click.argument("rst_path", callback=_rst_path)
@click.option(
"-wd",
"--working_dir",
type=pathlib.Path,
help="Set the working directory. Defaults to dir containing this script.",
)
@click.option("-t", "--test", is_flag=True, help="don't write script, print it")
def main(rst_path, working_dir, test):
"""extracts code under jupyter_execute or doctest blocks to python script"""
outpath = f"{rst_path.stem}.py"
outpath = (working_dir / outpath) if working_dir else (rst_path.parent / outpath)
doc = rst_path.read_text()
doc = doc.splitlines()
working_dir = working_dir or pathlib.Path(__file__).parent
just_code = get_code_blocks(doc, working_dir)
if test:
print(just_code)
exit()
outpath.write_text(just_code)
if __name__ == "__main__":
main()
|