1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
|
from urllib.parse import urlencode
from urllib.request import urlopen
from django.apps import apps as django_apps
from django.conf import settings
from django.core import paginator
from django.core.exceptions import ImproperlyConfigured
from django.urls import NoReverseMatch, reverse
from django.utils import translation
PING_URL = "https://www.google.com/webmasters/tools/ping"
class SitemapNotFound(Exception):
pass
def ping_google(sitemap_url=None, ping_url=PING_URL, sitemap_uses_https=True):
"""
Alert Google that the sitemap for the current site has been updated.
If sitemap_url is provided, it should be an absolute path to the sitemap
for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
function will attempt to deduce it by using urls.reverse().
"""
sitemap_full_url = _get_sitemap_full_url(sitemap_url, sitemap_uses_https)
params = urlencode({'sitemap': sitemap_full_url})
urlopen('%s?%s' % (ping_url, params))
def _get_sitemap_full_url(sitemap_url, sitemap_uses_https=True):
if not django_apps.is_installed('django.contrib.sites'):
raise ImproperlyConfigured("ping_google requires django.contrib.sites, which isn't installed.")
if sitemap_url is None:
try:
# First, try to get the "index" sitemap URL.
sitemap_url = reverse('django.contrib.sitemaps.views.index')
except NoReverseMatch:
try:
# Next, try for the "global" sitemap URL.
sitemap_url = reverse('django.contrib.sitemaps.views.sitemap')
except NoReverseMatch:
pass
if sitemap_url is None:
raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
Site = django_apps.get_model('sites.Site')
current_site = Site.objects.get_current()
scheme = 'https' if sitemap_uses_https else 'http'
return '%s://%s%s' % (scheme, current_site.domain, sitemap_url)
class Sitemap:
# This limit is defined by Google. See the index documentation at
# https://www.sitemaps.org/protocol.html#index.
limit = 50000
# If protocol is None, the URLs in the sitemap will use the protocol
# with which the sitemap was requested.
protocol = None
# Enables generating URLs for all languages.
i18n = False
# Override list of languages to use.
languages = None
# Enables generating alternate/hreflang links.
alternates = False
# Add an alternate/hreflang link with value 'x-default'.
x_default = False
def _get(self, name, item, default=None):
try:
attr = getattr(self, name)
except AttributeError:
return default
if callable(attr):
if self.i18n:
# Split the (item, lang_code) tuples again for the location,
# priority, lastmod and changefreq method calls.
item, lang_code = item
return attr(item)
return attr
def _languages(self):
if self.languages is not None:
return self.languages
return [lang_code for lang_code, _ in settings.LANGUAGES]
def _items(self):
if self.i18n:
# Create (item, lang_code) tuples for all items and languages.
# This is necessary to paginate with all languages already considered.
items = [
(item, lang_code)
for lang_code in self._languages()
for item in self.items()
]
return items
return self.items()
def _location(self, item, force_lang_code=None):
if self.i18n:
obj, lang_code = item
# Activate language from item-tuple or forced one before calling location.
with translation.override(force_lang_code or lang_code):
return self._get('location', item)
return self._get('location', item)
@property
def paginator(self):
return paginator.Paginator(self._items(), self.limit)
def items(self):
return []
def location(self, item):
return item.get_absolute_url()
def get_protocol(self, protocol=None):
# Determine protocol
return self.protocol or protocol or 'http'
def get_domain(self, site=None):
# Determine domain
if site is None:
if django_apps.is_installed('django.contrib.sites'):
Site = django_apps.get_model('sites.Site')
try:
site = Site.objects.get_current()
except Site.DoesNotExist:
pass
if site is None:
raise ImproperlyConfigured(
"To use sitemaps, either enable the sites framework or pass "
"a Site/RequestSite object in your view."
)
return site.domain
def get_urls(self, page=1, site=None, protocol=None):
protocol = self.get_protocol(protocol)
domain = self.get_domain(site)
return self._urls(page, protocol, domain)
def _urls(self, page, protocol, domain):
urls = []
latest_lastmod = None
all_items_lastmod = True # track if all items have a lastmod
paginator_page = self.paginator.page(page)
for item in paginator_page.object_list:
loc = f'{protocol}://{domain}{self._location(item)}'
priority = self._get('priority', item)
lastmod = self._get('lastmod', item)
if all_items_lastmod:
all_items_lastmod = lastmod is not None
if (all_items_lastmod and
(latest_lastmod is None or lastmod > latest_lastmod)):
latest_lastmod = lastmod
url_info = {
'item': item,
'location': loc,
'lastmod': lastmod,
'changefreq': self._get('changefreq', item),
'priority': str(priority if priority is not None else ''),
'alternates': [],
}
if self.i18n and self.alternates:
for lang_code in self._languages():
loc = f'{protocol}://{domain}{self._location(item, lang_code)}'
url_info['alternates'].append({
'location': loc,
'lang_code': lang_code,
})
if self.x_default:
lang_code = settings.LANGUAGE_CODE
loc = f'{protocol}://{domain}{self._location(item, lang_code)}'
loc = loc.replace(f'/{lang_code}/', '/', 1)
url_info['alternates'].append({
'location': loc,
'lang_code': 'x-default',
})
urls.append(url_info)
if all_items_lastmod and latest_lastmod:
self.latest_lastmod = latest_lastmod
return urls
class GenericSitemap(Sitemap):
priority = None
changefreq = None
def __init__(self, info_dict, priority=None, changefreq=None, protocol=None):
self.queryset = info_dict['queryset']
self.date_field = info_dict.get('date_field')
self.priority = self.priority or priority
self.changefreq = self.changefreq or changefreq
self.protocol = self.protocol or protocol
def items(self):
# Make sure to return a clone; we don't want premature evaluation.
return self.queryset.filter()
def lastmod(self, item):
if self.date_field is not None:
return getattr(item, self.date_field)
return None
|