File: metatag.py

package info (click to toggle)
python-skbio 0.6.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 11,924 kB
  • sloc: python: 67,527; ansic: 672; makefile: 225
file content (68 lines) | stat: -rw-r--r-- 2,023 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# ----------------------------------------------------------------------------
# Copyright (c) 2013--, scikit-bio development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE.txt, distributed with this software.
# ----------------------------------------------------------------------------

# Script to insert a meta description tag into each documentation page. The
# content is the first line of the docstring of the corresponding object.

import os
import re
import glob
import html


# -- Configuration -----------------------------------------------------------

rootdir = "build/html"
max_length = 160


# -- Workflow ----------------------------------------------------------------

dd_pattern = re.compile(r'<dd>(.*?)</dd>', flags=re.DOTALL)
p_pattern = re.compile(r'<p>(.*?)</p>', flags=re.DOTALL)
meta_pattern = re.compile(r'(\s*)(<meta\b[^>]*\/?>)')

cwd = os.getcwd()
os.chdir(os.path.join(os.path.dirname(__file__), rootdir))

for file in glob.glob("**/*.html", recursive=True):
    with open(file, "r") as fh:
        content = fh.read()

    # find first line of docstring (summary)
    dd_match = dd_pattern.search(content)
    if not dd_match:
        continue
    p_match = p_pattern.search(dd_match.group(1))
    if not p_match:
        continue
    summary = p_match.group(1)

    # truncate summary to given length
    if len(summary) > max_length:
        summary = summary[:max_length - 3]
        if ' ' in summary:
            summary = summary.rsplit(' ', 1)[0]
        summary += "..."

    # make summary safe for HTML
    summary = html.escape(summary)

    # insert summary into metadata
    line = f'<meta name="description" content="{summary}" />'
    meta_match = meta_pattern.search(content)
    if not meta_match:
        continue
    indent = meta_match.group(1)
    pos = meta_match.end()
    content = content[:pos] + indent + line + content[pos:]

    with open(file, "w") as fh:
        fh.write(content)

os.chdir(cwd)