1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
# ----------------------------------------------------------------------------
# Copyright (c) 2013--, scikit-bio development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE.txt, distributed with this software.
# ----------------------------------------------------------------------------
# Script to insert a meta description tag into each documentation page. The
# content is the first line of the docstring of the corresponding object.
import os
import re
import glob
import html
# -- Configuration -----------------------------------------------------------
rootdir = "build/html"
max_length = 160
# -- Workflow ----------------------------------------------------------------
dd_pattern = re.compile(r'<dd>(.*?)</dd>', flags=re.DOTALL)
p_pattern = re.compile(r'<p>(.*?)</p>', flags=re.DOTALL)
meta_pattern = re.compile(r'(\s*)(<meta\b[^>]*\/?>)')
cwd = os.getcwd()
os.chdir(os.path.join(os.path.dirname(__file__), rootdir))
for file in glob.glob("**/*.html", recursive=True):
with open(file, "r") as fh:
content = fh.read()
# find first line of docstring (summary)
dd_match = dd_pattern.search(content)
if not dd_match:
continue
p_match = p_pattern.search(dd_match.group(1))
if not p_match:
continue
summary = p_match.group(1)
# truncate summary to given length
if len(summary) > max_length:
summary = summary[:max_length - 3]
if ' ' in summary:
summary = summary.rsplit(' ', 1)[0]
summary += "..."
# make summary safe for HTML
summary = html.escape(summary)
# insert summary into metadata
line = f'<meta name="description" content="{summary}" />'
meta_match = meta_pattern.search(content)
if not meta_match:
continue
indent = meta_match.group(1)
pos = meta_match.end()
content = content[:pos] + indent + line + content[pos:]
with open(file, "w") as fh:
fh.write(content)
os.chdir(cwd)
|