123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- import datetime
- import warnings
- from dataclasses import dataclass
- from functools import wraps
-
- from django.contrib.sites.shortcuts import get_current_site
- from django.core.paginator import EmptyPage, PageNotAnInteger
- from django.http import Http404
- from django.template.response import TemplateResponse
- from django.urls import reverse
- from django.utils import timezone
- from django.utils.deprecation import RemovedInDjango50Warning
- from django.utils.http import http_date
-
-
- @dataclass
- class SitemapIndexItem:
- location: str
- last_mod: bool = None
-
- # RemovedInDjango50Warning
- def __str__(self):
- msg = (
- "Calling `__str__` on SitemapIndexItem is deprecated, use the `location` "
- "attribute instead."
- )
- warnings.warn(msg, RemovedInDjango50Warning, stacklevel=2)
- return self.location
-
-
- def x_robots_tag(func):
- @wraps(func)
- def inner(request, *args, **kwargs):
- response = func(request, *args, **kwargs)
- response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
- return response
-
- return inner
-
-
- def _get_latest_lastmod(current_lastmod, new_lastmod):
- """
- Returns the latest `lastmod` where `lastmod` can be either a date or a
- datetime.
- """
- if not isinstance(new_lastmod, datetime.datetime):
- new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
- if timezone.is_naive(new_lastmod):
- new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
- return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)
-
-
- @x_robots_tag
- def index(
- request,
- sitemaps,
- template_name="sitemap_index.xml",
- content_type="application/xml",
- sitemap_url_name="django.contrib.sitemaps.views.sitemap",
- ):
- req_protocol = request.scheme
- req_site = get_current_site(request)
-
- sites = [] # all sections' sitemap URLs
- all_indexes_lastmod = True
- latest_lastmod = None
- for section, site in sitemaps.items():
- # For each section label, add links of all pages of its sitemap
- # (usually generated by the `sitemap` view).
- if callable(site):
- site = site()
- protocol = req_protocol if site.protocol is None else site.protocol
- sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
- absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
- site_lastmod = site.get_latest_lastmod()
- if all_indexes_lastmod:
- if site_lastmod is not None:
- latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
- else:
- all_indexes_lastmod = False
- sites.append(SitemapIndexItem(absolute_url, site_lastmod))
- # Add links to all pages of the sitemap.
- for page in range(2, site.paginator.num_pages + 1):
- sites.append(
- SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
- )
- # If lastmod is defined for all sites, set header so as
- # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
- if all_indexes_lastmod and latest_lastmod:
- headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
- else:
- headers = None
- return TemplateResponse(
- request,
- template_name,
- {"sitemaps": sites},
- content_type=content_type,
- headers=headers,
- )
-
-
- @x_robots_tag
- def sitemap(
- request,
- sitemaps,
- section=None,
- template_name="sitemap.xml",
- content_type="application/xml",
- ):
- req_protocol = request.scheme
- req_site = get_current_site(request)
-
- if section is not None:
- if section not in sitemaps:
- raise Http404("No sitemap available for section: %r" % section)
- maps = [sitemaps[section]]
- else:
- maps = sitemaps.values()
- page = request.GET.get("p", 1)
-
- lastmod = None
- all_sites_lastmod = True
- urls = []
- for site in maps:
- try:
- if callable(site):
- site = site()
- urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
- if all_sites_lastmod:
- site_lastmod = getattr(site, "latest_lastmod", None)
- if site_lastmod is not None:
- lastmod = _get_latest_lastmod(lastmod, site_lastmod)
- else:
- all_sites_lastmod = False
- except EmptyPage:
- raise Http404("Page %s empty" % page)
- except PageNotAnInteger:
- raise Http404("No page '%s'" % page)
- # If lastmod is defined for all sites, set header so as
- # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
- if all_sites_lastmod:
- headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
- else:
- headers = None
- return TemplateResponse(
- request,
- template_name,
- {"urlset": urls},
- content_type=content_type,
- headers=headers,
- )
|