123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- import re
- from urllib.parse import urlparse
-
- from django.conf import settings
- from django.core.exceptions import PermissionDenied
- from django.core.mail import mail_managers
- from django.http import HttpResponsePermanentRedirect
- from django.urls import is_valid_path
- from django.utils.deprecation import MiddlewareMixin
- from django.utils.http import escape_leading_slashes
-
-
- class CommonMiddleware(MiddlewareMixin):
- """
- "Common" middleware for taking care of some basic operations:
-
- - Forbid access to User-Agents in settings.DISALLOWED_USER_AGENTS
-
- - URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings,
- append missing slashes and/or prepends missing "www."s.
-
- - If APPEND_SLASH is set and the initial URL doesn't end with a
- slash, and it is not found in urlpatterns, form a new URL by
- appending a slash at the end. If this new URL is found in
- urlpatterns, return an HTTP redirect to this new URL; otherwise
- process the initial URL as usual.
-
- This behavior can be customized by subclassing CommonMiddleware and
- overriding the response_redirect_class attribute.
- """
-
- response_redirect_class = HttpResponsePermanentRedirect
-
- def process_request(self, request):
- """
- Check for denied User-Agents and rewrite the URL based on
- settings.APPEND_SLASH and settings.PREPEND_WWW
- """
-
- # Check for denied User-Agents
- if 'HTTP_USER_AGENT' in request.META:
- for user_agent_regex in settings.DISALLOWED_USER_AGENTS:
- if user_agent_regex.search(request.META['HTTP_USER_AGENT']):
- raise PermissionDenied('Forbidden user agent')
-
- # Check for a redirect based on settings.PREPEND_WWW
- host = request.get_host()
- must_prepend = settings.PREPEND_WWW and host and not host.startswith('www.')
- redirect_url = ('%s://www.%s' % (request.scheme, host)) if must_prepend else ''
-
- # Check if a slash should be appended
- if self.should_redirect_with_slash(request):
- path = self.get_full_path_with_slash(request)
- else:
- path = request.get_full_path()
-
- # Return a redirect if necessary
- if redirect_url or path != request.get_full_path():
- redirect_url += path
- return self.response_redirect_class(redirect_url)
-
- def should_redirect_with_slash(self, request):
- """
- Return True if settings.APPEND_SLASH is True and appending a slash to
- the request path turns an invalid path into a valid one.
- """
- if settings.APPEND_SLASH and not request.path_info.endswith('/'):
- urlconf = getattr(request, 'urlconf', None)
- return (
- not is_valid_path(request.path_info, urlconf) and
- is_valid_path('%s/' % request.path_info, urlconf)
- )
- return False
-
- def get_full_path_with_slash(self, request):
- """
- Return the full path of the request with a trailing slash appended.
-
- Raise a RuntimeError if settings.DEBUG is True and request.method is
- POST, PUT, or PATCH.
- """
- new_path = request.get_full_path(force_append_slash=True)
- # Prevent construction of scheme relative urls.
- new_path = escape_leading_slashes(new_path)
- if settings.DEBUG and request.method in ('POST', 'PUT', 'PATCH'):
- raise RuntimeError(
- "You called this URL via %(method)s, but the URL doesn't end "
- "in a slash and you have APPEND_SLASH set. Django can't "
- "redirect to the slash URL while maintaining %(method)s data. "
- "Change your form to point to %(url)s (note the trailing "
- "slash), or set APPEND_SLASH=False in your Django settings." % {
- 'method': request.method,
- 'url': request.get_host() + new_path,
- }
- )
- return new_path
-
- def process_response(self, request, response):
- """
- When the status code of the response is 404, it may redirect to a path
- with an appended slash if should_redirect_with_slash() returns True.
- """
- # If the given URL is "Not Found", then check if we should redirect to
- # a path with a slash appended.
- if response.status_code == 404:
- if self.should_redirect_with_slash(request):
- return self.response_redirect_class(self.get_full_path_with_slash(request))
-
- # Add the Content-Length header to non-streaming responses if not
- # already set.
- if not response.streaming and not response.has_header('Content-Length'):
- response['Content-Length'] = str(len(response.content))
-
- return response
-
-
- class BrokenLinkEmailsMiddleware(MiddlewareMixin):
-
- def process_response(self, request, response):
- """Send broken link emails for relevant 404 NOT FOUND responses."""
- if response.status_code == 404 and not settings.DEBUG:
- domain = request.get_host()
- path = request.get_full_path()
- referer = request.META.get('HTTP_REFERER', '')
-
- if not self.is_ignorable_request(request, path, domain, referer):
- ua = request.META.get('HTTP_USER_AGENT', '<none>')
- ip = request.META.get('REMOTE_ADDR', '<none>')
- mail_managers(
- "Broken %slink on %s" % (
- ('INTERNAL ' if self.is_internal_request(domain, referer) else ''),
- domain
- ),
- "Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
- "IP address: %s\n" % (referer, path, ua, ip),
- fail_silently=True,
- )
- return response
-
- def is_internal_request(self, domain, referer):
- """
- Return True if the referring URL is the same domain as the current
- request.
- """
- # Different subdomains are treated as different domains.
- return bool(re.match("^https?://%s/" % re.escape(domain), referer))
-
- def is_ignorable_request(self, request, uri, domain, referer):
- """
- Return True if the given request *shouldn't* notify the site managers
- according to project settings or in situations outlined by the inline
- comments.
- """
- # The referer is empty.
- if not referer:
- return True
-
- # APPEND_SLASH is enabled and the referer is equal to the current URL
- # without a trailing slash indicating an internal redirect.
- if settings.APPEND_SLASH and uri.endswith('/') and referer == uri[:-1]:
- return True
-
- # A '?' in referer is identified as a search engine source.
- if not self.is_internal_request(domain, referer) and '?' in referer:
- return True
-
- # The referer is equal to the current URL, ignoring the scheme (assumed
- # to be a poorly implemented bot).
- parsed_referer = urlparse(referer)
- if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri:
- return True
-
- return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
|