1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
import datetime
import warnings
from dataclasses import dataclass
from functools import wraps
from django.contrib.sites.shortcuts import get_current_site
from django.core.paginator import EmptyPage, PageNotAnInteger
from django.http import Http404
from django.template.response import TemplateResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.deprecation import RemovedInDjango50Warning
from django.utils.http import http_date
@dataclass
class SitemapIndexItem:
location: str
last_mod: bool = None
# RemovedInDjango50Warning
def __str__(self):
msg = (
"Calling `__str__` on SitemapIndexItem is deprecated, use the `location` "
"attribute instead."
)
warnings.warn(msg, RemovedInDjango50Warning, stacklevel=2)
return self.location
def x_robots_tag(func):
@wraps(func)
def inner(request, *args, **kwargs):
response = func(request, *args, **kwargs)
response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
return response
return inner
def _get_latest_lastmod(current_lastmod, new_lastmod):
"""
Returns the latest `lastmod` where `lastmod` can be either a date or a
datetime.
"""
if not isinstance(new_lastmod, datetime.datetime):
new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
if timezone.is_naive(new_lastmod):
new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)
@x_robots_tag
def index(
request,
sitemaps,
template_name="sitemap_index.xml",
content_type="application/xml",
sitemap_url_name="django.contrib.sitemaps.views.sitemap",
):
req_protocol = request.scheme
req_site = get_current_site(request)
sites = [] # all sections' sitemap URLs
all_indexes_lastmod = True
latest_lastmod = None
for section, site in sitemaps.items():
# For each section label, add links of all pages of its sitemap
# (usually generated by the `sitemap` view).
if callable(site):
site = site()
protocol = req_protocol if site.protocol is None else site.protocol
sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
site_lastmod = site.get_latest_lastmod()
if all_indexes_lastmod:
if site_lastmod is not None:
latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
else:
all_indexes_lastmod = False
sites.append(SitemapIndexItem(absolute_url, site_lastmod))
# Add links to all pages of the sitemap.
for page in range(2, site.paginator.num_pages + 1):
sites.append(
SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
)
# If lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
if all_indexes_lastmod and latest_lastmod:
headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
else:
headers = None
return TemplateResponse(
request,
template_name,
{"sitemaps": sites},
content_type=content_type,
headers=headers,
)
@x_robots_tag
def sitemap(
request,
sitemaps,
section=None,
template_name="sitemap.xml",
content_type="application/xml",
):
req_protocol = request.scheme
req_site = get_current_site(request)
if section is not None:
if section not in sitemaps:
raise Http404("No sitemap available for section: %r" % section)
maps = [sitemaps[section]]
else:
maps = sitemaps.values()
page = request.GET.get("p", 1)
lastmod = None
all_sites_lastmod = True
urls = []
for site in maps:
try:
if callable(site):
site = site()
urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
if all_sites_lastmod:
site_lastmod = getattr(site, "latest_lastmod", None)
if site_lastmod is not None:
lastmod = _get_latest_lastmod(lastmod, site_lastmod)
else:
all_sites_lastmod = False
except EmptyPage:
raise Http404("Page %s empty" % page)
except PageNotAnInteger:
raise Http404("No page '%s'" % page)
# If lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
if all_sites_lastmod:
headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
else:
headers = None
return TemplateResponse(
request,
template_name,
{"urlset": urls},
content_type=content_type,
headers=headers,
)
|