File: web.py

package info (click to toggle)
python-motor 3.7.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,572 kB
  • sloc: python: 12,252; javascript: 137; makefile: 74; sh: 8
file content (182 lines) | stat: -rw-r--r-- 7,081 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# Copyright 2011-2014 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Utilities for using Motor with Tornado web applications."""
import datetime
import email.utils
import mimetypes
import time

import gridfs
import tornado.web

import motor
from motor.motor_gridfs import _hash_gridout

# mypy: disable-error-code="no-untyped-def,no-untyped-call"

# TODO: this class is not a drop-in replacement for StaticFileHandler.
#   StaticFileHandler provides class method make_static_url, which appends
#   an checksum of the static file's contents. Templates thus can do
#   {{ static_url('image.png') }} and get "/static/image.png?v=1234abcdef",
#   which is cached forever. Problem is, it calculates the checksum synchronously.
#   Two options: keep a synchronous GridFS available to get each grid file's
#   checksum synchronously for every static_url call, or find some other idiom.


class GridFSHandler(tornado.web.RequestHandler):
    """A handler that can serve content from GridFS, very similar to
    :class:`tornado.web.StaticFileHandler`.

    .. code-block:: python

        db = motor.MotorClient().my_database
        application = web.Application(
            [
                (r"/static/(.*)", web.GridFSHandler, {"database": db}),
            ]
        )

    By default, requests' If-Modified-Since headers are honored, but no
    specific cache-control timeout is sent to clients. Thus each request for
    a GridFS file requires a quick check of the file's ``uploadDate`` in
    MongoDB. Override :meth:`get_cache_time` in a subclass to customize this.
    """

    def initialize(self, database, root_collection="fs"):
        self.database = database
        self.root_collection = root_collection

    def get_gridfs_file(self, bucket, filename, request):
        """Overridable method to choose a GridFS file to serve at a URL.

        By default, if a URL pattern like ``"/static/(.*)"`` is mapped to this
        ``GridFSHandler``, then the trailing portion of the URL is used as the
        filename, so a request for "/static/image.png" results in a call to
        :meth:`MotorGridFSBucket.open_download_stream_by_name` with "image.png"
        as the ``filename`` argument. To customize the mapping of path to
        GridFS file, override ``get_gridfs_file`` and return a Future
        :class:`~motor.MotorGridOut` from it.

        For example, to retrieve the file by ``_id`` instead of filename::

            class CustomGridFSHandler(motor.web.GridFSHandler):
                def get_gridfs_file(self, bucket, filename, request):
                    # Path is interpreted as _id instead of name.
                    # Return a Future MotorGridOut.
                    return fs.open_download_stream(file_id=ObjectId(path))

        :Parameters:
          - `bucket`: A :class:`~motor.motor_tornado.MotorGridFSBucket`
          - `filename`: A string, the matched group of the URL pattern
          - `request`: An :class:`tornado.httputil.HTTPServerRequest`

        .. versionchanged:: 1.0
          **BREAKING CHANGE**: Now takes a
          :class:`~motor.motor_tornado.MotorGridFSBucket`, not a
          ``MotorGridFS``.
          Also takes an additional ``request`` parameter.

        .. versionchanged:: 0.2
           ``get_gridfs_file`` no longer accepts a callback, instead returns
           a Future.
        """
        return bucket.open_download_stream_by_name(filename)

    async def get(self, path, include_body=True):
        fs = motor.MotorGridFSBucket(self.database, self.root_collection)

        try:
            gridout = await self.get_gridfs_file(fs, path, self.request)
        except gridfs.NoFile:
            raise tornado.web.HTTPError(404) from None

        # If-Modified-Since header is only good to the second.
        modified = gridout.upload_date.replace(microsecond=0)
        self.set_header("Last-Modified", modified)

        # Get the hash for the GridFS file.
        checksum = _hash_gridout(gridout)

        self.set_header("Etag", '"%s"' % checksum)

        mime_type = gridout.content_type

        # If content type is not defined, try to check it with mimetypes
        if mime_type is None:
            mime_type, encoding = mimetypes.guess_type(path)

        # Starting from here, largely a copy of StaticFileHandler
        if mime_type:
            self.set_header("Content-Type", mime_type)

        cache_time = self.get_cache_time(path, modified, mime_type)

        if cache_time > 0:
            self.set_header(
                "Expires",
                datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
                + datetime.timedelta(seconds=cache_time),
            )
            self.set_header("Cache-Control", "max-age=" + str(cache_time))
        else:
            self.set_header("Cache-Control", "public")

        self.set_extra_headers(path, gridout)

        # Check the If-Modified-Since, and don't send the result if the
        # content has not been modified
        ims_value = self.request.headers.get("If-Modified-Since")
        if ims_value is not None:
            date_tuple = email.utils.parsedate(ims_value)

            # If our MotorClient is tz-aware, assume the naive ims_value is in
            # its time zone.
            if_since = datetime.datetime.fromtimestamp(time.mktime(date_tuple)).replace(
                tzinfo=modified.tzinfo
            )

            if if_since >= modified:
                self.set_status(304)
                return

        # Same for Etag
        etag = self.request.headers.get("If-None-Match")
        if etag is not None and etag.strip('"') == checksum:
            self.set_status(304)
            return

        self.set_header("Content-Length", gridout.length)
        if include_body:
            await gridout.stream_to_handler(self)

        # Needed until fix for Tornado bug 751 is released, see
        # https://github.com/facebook/tornado/issues/751 and
        # https://github.com/facebook/tornado/commit/5491685
        self.finish()

    def head(self, path):
        # get() is a coroutine. Return its Future.
        return self.get(path, include_body=False)

    def get_cache_time(self, path, modified, mime_type):
        """Override to customize cache control behavior.

        Return a positive number of seconds to trigger aggressive caching or 0
        to mark resource as cacheable, only. 0 is the default.
        """
        return 0

    def set_extra_headers(self, path, gridout):
        """For subclass to add extra headers to the response"""