1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
|
r"""Parser for python source files."""
# Created Sun Nov 27 14:03:07 2016
# Author: Óscar Nájera
import ast
import re
import tokenize
from collections import namedtuple
from io import BytesIO
from textwrap import dedent
from sphinx.errors import ExtensionError
from sphinx.util.logging import getLogger
logger = getLogger("sphinx-gallery")
SYNTAX_ERROR_DOCSTRING = """
SyntaxError
===========
Example script with invalid Python syntax
"""
# The pattern for in-file config comments is designed to not greedily match
# newlines at the start and end, except for one newline at the end. This
# ensures that the matched pattern can be removed from the code without
# changing the block structure; i.e. empty newlines are preserved, e.g. in
#
# a = 1
#
# # sphinx_gallery_thumbnail_number = 2
#
# b = 2
FLAG_START = r"^[\ \t]*#\s*"
FLAG_BODY = r"sphinx_gallery_([A-Za-z0-9_]+)(\s*=\s*(.+))?[\ \t]*\n?"
INFILE_CONFIG_PATTERN = re.compile(FLAG_START + FLAG_BODY, re.MULTILINE)
START_IGNORE_FLAG = FLAG_START + "sphinx_gallery_start_ignore"
END_IGNORE_FLAG = FLAG_START + "sphinx_gallery_end_ignore"
IGNORE_BLOCK_PATTERN = re.compile(
rf"{START_IGNORE_FLAG}(?:[\s\S]*?){END_IGNORE_FLAG}\n?", re.MULTILINE
)
def parse_source_file(filename):
"""Parse source file into AST node.
Parameters
----------
filename : str
File path
Returns
-------
node : AST node
content : utf-8 encoded string
"""
# builtin open automatically converts \r\n to \n
with open(filename, "r", encoding="utf-8") as fid:
content = fid.read()
try:
node = ast.parse(content)
return node, content
except SyntaxError:
return None, content
def _get_docstring_and_rest(filename):
"""Separate ``filename`` content between docstring and the rest.
Strongly inspired from ast.get_docstring.
Returns
-------
docstring : str
docstring of ``filename``
rest : str
``filename`` content without the docstring
lineno : int
The line number.
node : ast.Module
The ast node. When `filename` parsed with `mode='exec'` node should be
of type `ast.Module`.
"""
node, content = parse_source_file(filename)
if node is None:
return SYNTAX_ERROR_DOCSTRING, content, 1, node
if not isinstance(node, ast.Module):
raise ExtensionError(
"This function only supports modules. You provided {}".format(
node.__class__.__name__
)
)
if not (
node.body
and isinstance(node.body[0], ast.Expr)
and isinstance(node.body[0].value, ast.Constant)
):
raise ExtensionError(
'Could not find docstring in file "{}". '
"A docstring is required by sphinx-gallery "
'unless the file is ignored by "ignore_pattern"'.format(filename)
)
# Python 3.7+ way
docstring = ast.get_docstring(node)
assert docstring is not None # should be guaranteed above
# This is just for backward compat
if node.body[0].value.value[:1] == "\n":
# just for strict backward compat here
docstring = "\n" + docstring
ts = tokenize.tokenize(BytesIO(content.encode()).readline)
# find the first string according to the tokenizer and get its end row
for tk in ts:
if tk.exact_type == 3:
lineno, _ = tk.end
break
else:
lineno = 0
# This get the content of the file after the docstring last line
# Note: 'maxsplit' argument is not a keyword argument in python2
rest = "\n".join(content.split("\n")[lineno:])
lineno += 1
return docstring, rest, lineno, node
def extract_file_config(content):
"""Pull out the file-specific config specified in the docstring."""
file_conf = {}
for match in re.finditer(INFILE_CONFIG_PATTERN, content):
name = match.group(1)
value = match.group(3)
if value is None: # a flag rather than a config setting
continue
try:
value = ast.literal_eval(value)
except (SyntaxError, ValueError):
logger.warning(
"Sphinx-gallery option %s was passed invalid value %s", name, value
)
else:
file_conf[name] = value
return file_conf
Block = namedtuple("Block", ["type", "content", "lineno"])
# type: "text" or "code"
# content (str): the block lines as str
# lineno (int): the line number where the block starts
def split_code_and_text_blocks(source_file, return_node=False):
"""Return list with source file separated into code and text blocks.
Parameters
----------
source_file : str
Path to the source file.
return_node : bool
If True, return the ast node.
Returns
-------
file_conf : dict
File-specific settings given in source file comments as:
``# sphinx_gallery_<name> = <value>``
blocks : list
(label, content, line_number)
List where each element is a tuple with the label ('text' or 'code'),
the corresponding content string of block and the leading line number.
node : ast.Module
The parsed ast node.
"""
docstring, rest_of_content, lineno, node = _get_docstring_and_rest(source_file)
blocks = [Block("text", docstring, 1)]
file_conf = extract_file_config(rest_of_content)
pattern = re.compile(
r"(?P<header_line>^#{20,}.*|^# ?%%.*)\s(?P<text_content>(?:^#.*\s?)*)",
flags=re.M,
)
sub_pat = re.compile("^#", flags=re.M)
pos_so_far = 0
for match in re.finditer(pattern, rest_of_content):
code_block_content = rest_of_content[pos_so_far : match.start()]
if code_block_content.strip():
blocks.append(Block("code", code_block_content, lineno))
lineno += code_block_content.count("\n")
lineno += 1 # Ignored header line of hashes.
text_content = match.group("text_content")
text_block_content = dedent(re.sub(sub_pat, "", text_content)).lstrip()
if text_block_content.strip():
blocks.append(Block("text", text_block_content, lineno))
lineno += text_content.count("\n")
pos_so_far = match.end()
remaining_content = rest_of_content[pos_so_far:]
if remaining_content.strip():
blocks.append(Block("code", remaining_content, lineno))
out = (file_conf, blocks)
if return_node:
out += (node,)
return out
def remove_ignore_blocks(code_block):
"""
Return the content of *code_block* with ignored areas removed.
An ignore block starts with # sphinx_gallery_start_ignore, and ends with
# sphinx_gallery_end_ignore. These lines and anything in between them will
be removed, but surrounding empty lines are preserved.
Parameters
----------
code_block : str
A code segment.
"""
num_start_flags = len(re.findall(START_IGNORE_FLAG, code_block, re.MULTILINE))
num_end_flags = len(re.findall(END_IGNORE_FLAG, code_block, re.MULTILINE))
if num_start_flags != num_end_flags:
raise ExtensionError(
'All "sphinx_gallery_start_ignore" flags must have a matching '
'"sphinx_gallery_end_ignore" flag!'
)
return re.subn(IGNORE_BLOCK_PATTERN, "", code_block)[0]
def remove_config_comments(code_block):
"""
Return the content of *code_block* with in-file config comments removed.
Comment lines of the pattern '# sphinx_gallery_[option] = [val]' are
removed, but surrounding empty lines are preserved.
Parameters
----------
code_block : str
A code segment.
"""
parsed_code, _ = re.subn(INFILE_CONFIG_PATTERN, "", code_block)
return parsed_code
|