File: image_srcsets.py

package info (click to toggle)
beanbag-docutils 2.4-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 472 kB
  • sloc: python: 2,671; makefile: 202; sh: 8
file content (308 lines) | stat: -rw-r--r-- 9,264 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
"""Sphinx extension for srcsets in images.

.. versionadded:: 2.1


This extension adds a ``sources`` option to the standard image directives,
enabling responsive image support via srcsets.

These are specified a bit differently from ``<img srcset="...">`` values.
The descriptor goes first, and a comma between entries is optional (a blank
line can be used instead). For example:

.. code-block:: rst

   .. image:: path/to/file.png
      :sources: 2x path/to/file@2x.png
                3x path/to/file@3x.png
                100w path/to/file@100w.png
                200h path/to/file@200h.png

If ``sources`` is not explicitly provided, but files with those standard
``@``-based suffixes exist alongside the referenced main image, they'll
automatically be used to define the srcsets of the image. The ``1x`` entry is
also automatically inserted based on the main image.

If relying on the default of scanning for srcset images, this becomes a
zero-configuration, drop-in solution for all Sphinx documentation codebases.


Setup
=====

To use this, you just need to add the extension in :file:`conf.py`::

    extensions = [
        ...
        'beanbag_docutils.sphinx.ext.image_srcsets',
        ...
    ]
"""

import os
import posixpath
import re
from collections import OrderedDict
from glob import glob
from typing import Dict, List, Optional, TYPE_CHECKING
from urllib.parse import quote as urllib_quote

from docutils import nodes
from docutils.parsers.rst import directives
from docutils.parsers.rst.directives.images import Image
from sphinx.application import Sphinx
from sphinx.util.i18n import search_image_for_language

from beanbag_docutils import VERSION

if TYPE_CHECKING:
    from sphinx.environment import BuildEnvironment


WIDTH_ATTR_RE = re.compile(r' width="(\d+)"')
HEIGHT_ATTR_RE = re.compile(r' height="(\d+)"')
STYLE_ATTR_RE = re.compile(r' style="(?P<style>[^"]+)"')


def _get_srcsets(
    env,          # type: BuildEnvironment
    node,         # type: nodes.image
    docname=None  # type: Optional[str]
):  # type: (...) -> Dict[str, str]
    """Return a normalized version of all srcsets for an image node.

    This will convert a ``sources`` option to a dictionary mapping descriptors
    (such as ``2x``, ``100w``, etc.) to URLs.

    These will be cached for future lookup.

    Args:
        env (sphinx.environment.BuildEnvironment):
            The current Sphinx build environment.

        node (docutils.nodes.image):
            The image node to retrieve sources from.

        docname (str, optional):
            The current document name.

            This is required when srcsets are not already cached.

    Returns:
        dict:
        The mapping of descriptors to URLs.
    """
    try:
        norm_srcsets = node.attributes['_srcsets']
    except KeyError:
        assert docname

        srcset = node.attributes.get('sources')
        norm_srcsets = OrderedDict()

        if srcset:
            norm_srcsets['1x'] = node.attributes['uri']

            for source in re.split(r',|\n+', srcset):
                source = source.strip()

                if source:
                    descriptor, url = source.split(' ', 1)
                    norm_srcsets[descriptor.strip()] = env.relfn2path(
                        search_image_for_language(url.strip(), env),
                        docname)[0]

        node.attributes['_srcsets'] = norm_srcsets

    return norm_srcsets


def _visit_image_html(
    self,
    node   # type: nodes.image
):  # type: (...) -> None
    """Process an Image node.

    This will update the HTML of the image node with a ``srcsets=`` attribute
    if srcsets are needed.

    Args:
        node (docutils.nodes.image):
            The image node to process.
    """
    # Use the default logic to build the image tag, since it's non-trivial.
    type(self).visit_image(self, node)

    builder = self.builder
    env = builder.env
    images = env.images
    base_images_path = builder.imgpath

    srcsets = _get_srcsets(node=node,
                           env=env)

    if srcsets:
        last_tag = self.body[-1]
        assert last_tag.startswith('<img ')

        new_attrs: List[str] = [
            'srcset="%s"' % ', '.join(
                '%s %s' % (
                    posixpath.join(base_images_path,
                                   urllib_quote(images[url][1])),
                    source)
                for source, url in srcsets.items()
            ),
        ]

        # Set a width attribute.
        m = WIDTH_ATTR_RE.search(last_tag)

        if not m and 'width' in node:
            new_attrs.append('width="%s"' % node['width'])

        # Set a height attribute.
        m = HEIGHT_ATTR_RE.search(last_tag)

        if not m and 'height' in node:
            new_attrs.append('height="%s"' % node['height'])

        # Remove dimensions from the style="" attribute.
        m = STYLE_ATTR_RE.search(last_tag)

        if m:
            styles = m.group('style').split(';')
            new_styles: List[str] = [
                style
                for style in styles
                if (style and
                    not style.lstrip().startswith(('width:', 'height:')))
            ]

            if new_styles:
                start, stop = m.span('style')
                last_tag = '%s%s%s' % (last_tag[:start],
                                       ';'.join(new_styles),
                                       last_tag[stop:])
            else:
                start, stop = m.span()
                last_tag = '%s%s' % (last_tag[:start],
                                     last_tag[stop:])

        self.body[-1] = '<img %s %s' % (
            ' '.join(new_attrs),
            last_tag[len('<img '):],
        )


def collect_srcsets(
    app,     # type: Sphinx
    doctree  # type: nodes.document
):  # type: (...) -> None
    """Collect all images referenced by image nodes or scanned in directories.

    This will collect any explicit values defined via our ``sources`` option
    for image directives. If ``sources`` is not specified, but there are files
    in the directory with ``@2x``, ``@3x``, ``@100w`` ``@100h``, etc.
    descriptors, those will be collected instead and associated with the image.

    Args:
        app (sphinx.application.Sphinx):
            The Sphinx application being run.

        doctree (docutils.nodes.document):
            The document tree being processed.
    """
    env = app.env
    images = env.images
    docname = env.docname

    if hasattr(doctree, 'findall'):
        # This is the modern way of finding nodes.
        findall = doctree.findall
    else:
        # This is pending deprecation in docutils.
        findall = doctree.traverse

    for node in findall(nodes.image):
        srcsets = _get_srcsets(node=node,
                               env=env,
                               docname=docname)

        if not srcsets:
            # NOTE: This will modify the contents of the cached srcsets.
            uri = node['uri']
            image_path = search_image_for_language(uri, env)
            base_filename, ext = os.path.splitext(image_path)
            candidates = glob('%s@*%s' % (base_filename, ext))

            if candidates:
                srcsets['1x'] = uri

                pattern = re.compile(r'%s@(\d+[xwh])%s'
                                     % (re.escape(base_filename),
                                        re.escape(ext)))

                for candidate in sorted(candidates):
                    m = pattern.match(candidate)

                    if m:
                        descriptor = m.group(1)

                        if descriptor not in srcsets:
                            srcsets[descriptor] = candidate

        for descriptor, image_path in srcsets.items():
            env.dependencies[docname].add(image_path)
            images.add_file(docname, image_path)


def collect_pages(
    app  # type: Sphinx
):  # type: (...) -> List
    """Collect srcset-specified images for use in HTML pages.

    This will go through the images referenced in a document for an HTML page
    and add any images found in srcsets to the list of images to collect for
    the page.

    Args:
        app (sphinx.application.Sphinx):
            The Sphinx application to register roles and configuration on.

    Returns:
        list:
        An empty list (indicating no additional HTML pages are collected).
    """
    app.builder.images.update({
        full_path: filename
        for full_path, (docnames, filename) in app.env.images.items()
    })

    return []


def setup(
    app  # type: Sphinx
):  # type: (...) -> Dict
    """Set up the Sphinx extension.

    This listens for the events needed to collect and bundle images for
    srcsets, and update the resulting HTML to specify them.

    Args:
        app (sphinx.application.Sphinx):
            The Sphinx application being run.
    """
    Image.option_spec['sources'] = directives.unchanged

    app.add_node(nodes.image,
                 html=(_visit_image_html, None),
                 override=True)

    app.connect('doctree-read', collect_srcsets)
    app.connect('html-collect-pages', collect_pages)

    return {
        'version': VERSION,
    }