123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603 |
- import copy
- import re
- import warnings
- from io import BytesIO
- from itertools import chain
- from urllib.parse import quote, urlencode, urljoin, urlsplit
-
- from django.conf import settings
- from django.core import signing
- from django.core.exceptions import (
- DisallowedHost, ImproperlyConfigured, RequestDataTooBig,
- )
- from django.core.files import uploadhandler
- from django.http.multipartparser import MultiPartParser, MultiPartParserError
- from django.utils.datastructures import (
- CaseInsensitiveMapping, ImmutableList, MultiValueDict,
- )
- from django.utils.deprecation import RemovedInDjango30Warning
- from django.utils.encoding import escape_uri_path, iri_to_uri
- from django.utils.functional import cached_property
- from django.utils.http import is_same_domain, limited_parse_qsl
-
- RAISE_ERROR = object()
- host_validation_re = re.compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
-
-
- class UnreadablePostError(IOError):
- pass
-
-
- class RawPostDataException(Exception):
- """
- You cannot access raw_post_data from a request that has
- multipart/* POST data if it has been accessed via POST,
- FILES, etc..
- """
- pass
-
-
- class HttpRequest:
- """A basic HTTP request."""
-
- # The encoding used in GET/POST dicts. None means use default setting.
- _encoding = None
- _upload_handlers = []
-
- def __init__(self):
- # WARNING: The `WSGIRequest` subclass doesn't call `super`.
- # Any variable assignment made here should also happen in
- # `WSGIRequest.__init__()`.
-
- self.GET = QueryDict(mutable=True)
- self.POST = QueryDict(mutable=True)
- self.COOKIES = {}
- self.META = {}
- self.FILES = MultiValueDict()
-
- self.path = ''
- self.path_info = ''
- self.method = None
- self.resolver_match = None
- self.content_type = None
- self.content_params = None
-
- def __repr__(self):
- if self.method is None or not self.get_full_path():
- return '<%s>' % self.__class__.__name__
- return '<%s: %s %r>' % (self.__class__.__name__, self.method, self.get_full_path())
-
- @cached_property
- def headers(self):
- return HttpHeaders(self.META)
-
- def _get_raw_host(self):
- """
- Return the HTTP host using the environment or request headers. Skip
- allowed hosts protection, so may return an insecure host.
- """
- # We try three options, in order of decreasing preference.
- if settings.USE_X_FORWARDED_HOST and (
- 'HTTP_X_FORWARDED_HOST' in self.META):
- host = self.META['HTTP_X_FORWARDED_HOST']
- elif 'HTTP_HOST' in self.META:
- host = self.META['HTTP_HOST']
- else:
- # Reconstruct the host using the algorithm from PEP 333.
- host = self.META['SERVER_NAME']
- server_port = self.get_port()
- if server_port != ('443' if self.is_secure() else '80'):
- host = '%s:%s' % (host, server_port)
- return host
-
- def get_host(self):
- """Return the HTTP host using the environment or request headers."""
- host = self._get_raw_host()
-
- # Allow variants of localhost if ALLOWED_HOSTS is empty and DEBUG=True.
- allowed_hosts = settings.ALLOWED_HOSTS
- if settings.DEBUG and not allowed_hosts:
- allowed_hosts = ['localhost', '127.0.0.1', '[::1]']
-
- domain, port = split_domain_port(host)
- if domain and validate_host(domain, allowed_hosts):
- return host
- else:
- msg = "Invalid HTTP_HOST header: %r." % host
- if domain:
- msg += " You may need to add %r to ALLOWED_HOSTS." % domain
- else:
- msg += " The domain name provided is not valid according to RFC 1034/1035."
- raise DisallowedHost(msg)
-
- def get_port(self):
- """Return the port number for the request as a string."""
- if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META:
- port = self.META['HTTP_X_FORWARDED_PORT']
- else:
- port = self.META['SERVER_PORT']
- return str(port)
-
- def get_full_path(self, force_append_slash=False):
- return self._get_full_path(self.path, force_append_slash)
-
- def get_full_path_info(self, force_append_slash=False):
- return self._get_full_path(self.path_info, force_append_slash)
-
- def _get_full_path(self, path, force_append_slash):
- # RFC 3986 requires query string arguments to be in the ASCII range.
- # Rather than crash if this doesn't happen, we encode defensively.
- return '%s%s%s' % (
- escape_uri_path(path),
- '/' if force_append_slash and not path.endswith('/') else '',
- ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
- )
-
- def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None):
- """
- Attempt to return a signed cookie. If the signature fails or the
- cookie has expired, raise an exception, unless the `default` argument
- is provided, in which case return that value.
- """
- try:
- cookie_value = self.COOKIES[key]
- except KeyError:
- if default is not RAISE_ERROR:
- return default
- else:
- raise
- try:
- value = signing.get_cookie_signer(salt=key + salt).unsign(
- cookie_value, max_age=max_age)
- except signing.BadSignature:
- if default is not RAISE_ERROR:
- return default
- else:
- raise
- return value
-
- def get_raw_uri(self):
- """
- Return an absolute URI from variables available in this request. Skip
- allowed hosts protection, so may return insecure URI.
- """
- return '{scheme}://{host}{path}'.format(
- scheme=self.scheme,
- host=self._get_raw_host(),
- path=self.get_full_path(),
- )
-
- def build_absolute_uri(self, location=None):
- """
- Build an absolute URI from the location and the variables available in
- this request. If no ``location`` is specified, build the absolute URI
- using request.get_full_path(). If the location is absolute, convert it
- to an RFC 3987 compliant URI and return it. If location is relative or
- is scheme-relative (i.e., ``//example.com/``), urljoin() it to a base
- URL constructed from the request variables.
- """
- if location is None:
- # Make it an absolute url (but schemeless and domainless) for the
- # edge case that the path starts with '//'.
- location = '//%s' % self.get_full_path()
- bits = urlsplit(location)
- if not (bits.scheme and bits.netloc):
- # Handle the simple, most common case. If the location is absolute
- # and a scheme or host (netloc) isn't provided, skip an expensive
- # urljoin() as long as no path segments are '.' or '..'.
- if (bits.path.startswith('/') and not bits.scheme and not bits.netloc and
- '/./' not in bits.path and '/../' not in bits.path):
- # If location starts with '//' but has no netloc, reuse the
- # schema and netloc from the current request. Strip the double
- # slashes and continue as if it wasn't specified.
- if location.startswith('//'):
- location = location[2:]
- location = self._current_scheme_host + location
- else:
- # Join the constructed URL with the provided location, which
- # allows the provided location to apply query strings to the
- # base path.
- location = urljoin(self._current_scheme_host + self.path, location)
- return iri_to_uri(location)
-
- @cached_property
- def _current_scheme_host(self):
- return '{}://{}'.format(self.scheme, self.get_host())
-
- def _get_scheme(self):
- """
- Hook for subclasses like WSGIRequest to implement. Return 'http' by
- default.
- """
- return 'http'
-
- @property
- def scheme(self):
- if settings.SECURE_PROXY_SSL_HEADER:
- try:
- header, secure_value = settings.SECURE_PROXY_SSL_HEADER
- except ValueError:
- raise ImproperlyConfigured(
- 'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.'
- )
- header_value = self.META.get(header)
- if header_value is not None:
- return 'https' if header_value == secure_value else 'http'
- return self._get_scheme()
-
- def is_secure(self):
- return self.scheme == 'https'
-
- def is_ajax(self):
- return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
-
- @property
- def encoding(self):
- return self._encoding
-
- @encoding.setter
- def encoding(self, val):
- """
- Set the encoding used for GET/POST accesses. If the GET or POST
- dictionary has already been created, remove and recreate it on the
- next access (so that it is decoded correctly).
- """
- self._encoding = val
- if hasattr(self, 'GET'):
- del self.GET
- if hasattr(self, '_post'):
- del self._post
-
- def _initialize_handlers(self):
- self._upload_handlers = [uploadhandler.load_handler(handler, self)
- for handler in settings.FILE_UPLOAD_HANDLERS]
-
- @property
- def upload_handlers(self):
- if not self._upload_handlers:
- # If there are no upload handlers defined, initialize them from settings.
- self._initialize_handlers()
- return self._upload_handlers
-
- @upload_handlers.setter
- def upload_handlers(self, upload_handlers):
- if hasattr(self, '_files'):
- raise AttributeError("You cannot set the upload handlers after the upload has been processed.")
- self._upload_handlers = upload_handlers
-
- def parse_file_upload(self, META, post_data):
- """Return a tuple of (POST QueryDict, FILES MultiValueDict)."""
- self.upload_handlers = ImmutableList(
- self.upload_handlers,
- warning="You cannot alter upload handlers after the upload has been processed."
- )
- parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
- return parser.parse()
-
- @property
- def body(self):
- if not hasattr(self, '_body'):
- if self._read_started:
- raise RawPostDataException("You cannot access body after reading from request's data stream")
-
- # Limit the maximum request data size that will be handled in-memory.
- if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None and
- int(self.META.get('CONTENT_LENGTH') or 0) > settings.DATA_UPLOAD_MAX_MEMORY_SIZE):
- raise RequestDataTooBig('Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.')
-
- try:
- self._body = self.read()
- except IOError as e:
- raise UnreadablePostError(*e.args) from e
- self._stream = BytesIO(self._body)
- return self._body
-
- def _mark_post_parse_error(self):
- self._post = QueryDict()
- self._files = MultiValueDict()
-
- def _load_post_and_files(self):
- """Populate self._post and self._files if the content-type is a form type"""
- if self.method != 'POST':
- self._post, self._files = QueryDict(encoding=self._encoding), MultiValueDict()
- return
- if self._read_started and not hasattr(self, '_body'):
- self._mark_post_parse_error()
- return
-
- if self.content_type == 'multipart/form-data':
- if hasattr(self, '_body'):
- # Use already read data
- data = BytesIO(self._body)
- else:
- data = self
- try:
- self._post, self._files = self.parse_file_upload(self.META, data)
- except MultiPartParserError:
- # An error occurred while parsing POST data. Since when
- # formatting the error the request handler might access
- # self.POST, set self._post and self._file to prevent
- # attempts to parse POST data again.
- self._mark_post_parse_error()
- raise
- elif self.content_type == 'application/x-www-form-urlencoded':
- self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict()
- else:
- self._post, self._files = QueryDict(encoding=self._encoding), MultiValueDict()
-
- def close(self):
- if hasattr(self, '_files'):
- for f in chain.from_iterable(l[1] for l in self._files.lists()):
- f.close()
-
- # File-like and iterator interface.
- #
- # Expects self._stream to be set to an appropriate source of bytes by
- # a corresponding request subclass (e.g. WSGIRequest).
- # Also when request data has already been read by request.POST or
- # request.body, self._stream points to a BytesIO instance
- # containing that data.
-
- def read(self, *args, **kwargs):
- self._read_started = True
- try:
- return self._stream.read(*args, **kwargs)
- except IOError as e:
- raise UnreadablePostError(*e.args) from e
-
- def readline(self, *args, **kwargs):
- self._read_started = True
- try:
- return self._stream.readline(*args, **kwargs)
- except IOError as e:
- raise UnreadablePostError(*e.args) from e
-
- def __iter__(self):
- return iter(self.readline, b'')
-
- def xreadlines(self):
- warnings.warn(
- 'HttpRequest.xreadlines() is deprecated in favor of iterating the '
- 'request.', RemovedInDjango30Warning, stacklevel=2,
- )
- yield from self
-
- def readlines(self):
- return list(self)
-
-
- class HttpHeaders(CaseInsensitiveMapping):
- HTTP_PREFIX = 'HTTP_'
- # PEP 333 gives two headers which aren't prepended with HTTP_.
- UNPREFIXED_HEADERS = {'CONTENT_TYPE', 'CONTENT_LENGTH'}
-
- def __init__(self, environ):
- headers = {}
- for header, value in environ.items():
- name = self.parse_header_name(header)
- if name:
- headers[name] = value
- super().__init__(headers)
-
- @classmethod
- def parse_header_name(cls, header):
- if header.startswith(cls.HTTP_PREFIX):
- header = header[len(cls.HTTP_PREFIX):]
- elif header not in cls.UNPREFIXED_HEADERS:
- return None
- return header.replace('_', '-').title()
-
-
- class QueryDict(MultiValueDict):
- """
- A specialized MultiValueDict which represents a query string.
-
- A QueryDict can be used to represent GET or POST data. It subclasses
- MultiValueDict since keys in such data can be repeated, for instance
- in the data from a form with a <select multiple> field.
-
- By default QueryDicts are immutable, though the copy() method
- will always return a mutable copy.
-
- Both keys and values set on this class are converted from the given encoding
- (DEFAULT_CHARSET by default) to str.
- """
-
- # These are both reset in __init__, but is specified here at the class
- # level so that unpickling will have valid values
- _mutable = True
- _encoding = None
-
- def __init__(self, query_string=None, mutable=False, encoding=None):
- super().__init__()
- self.encoding = encoding or settings.DEFAULT_CHARSET
- query_string = query_string or ''
- parse_qsl_kwargs = {
- 'keep_blank_values': True,
- 'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
- 'encoding': self.encoding,
- }
- if isinstance(query_string, bytes):
- # query_string normally contains URL-encoded data, a subset of ASCII.
- try:
- query_string = query_string.decode(self.encoding)
- except UnicodeDecodeError:
- # ... but some user agents are misbehaving :-(
- query_string = query_string.decode('iso-8859-1')
- for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs):
- self.appendlist(key, value)
- self._mutable = mutable
-
- @classmethod
- def fromkeys(cls, iterable, value='', mutable=False, encoding=None):
- """
- Return a new QueryDict with keys (may be repeated) from an iterable and
- values from value.
- """
- q = cls('', mutable=True, encoding=encoding)
- for key in iterable:
- q.appendlist(key, value)
- if not mutable:
- q._mutable = False
- return q
-
- @property
- def encoding(self):
- if self._encoding is None:
- self._encoding = settings.DEFAULT_CHARSET
- return self._encoding
-
- @encoding.setter
- def encoding(self, value):
- self._encoding = value
-
- def _assert_mutable(self):
- if not self._mutable:
- raise AttributeError("This QueryDict instance is immutable")
-
- def __setitem__(self, key, value):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- value = bytes_to_text(value, self.encoding)
- super().__setitem__(key, value)
-
- def __delitem__(self, key):
- self._assert_mutable()
- super().__delitem__(key)
-
- def __copy__(self):
- result = self.__class__('', mutable=True, encoding=self.encoding)
- for key, value in self.lists():
- result.setlist(key, value)
- return result
-
- def __deepcopy__(self, memo):
- result = self.__class__('', mutable=True, encoding=self.encoding)
- memo[id(self)] = result
- for key, value in self.lists():
- result.setlist(copy.deepcopy(key, memo), copy.deepcopy(value, memo))
- return result
-
- def setlist(self, key, list_):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- list_ = [bytes_to_text(elt, self.encoding) for elt in list_]
- super().setlist(key, list_)
-
- def setlistdefault(self, key, default_list=None):
- self._assert_mutable()
- return super().setlistdefault(key, default_list)
-
- def appendlist(self, key, value):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- value = bytes_to_text(value, self.encoding)
- super().appendlist(key, value)
-
- def pop(self, key, *args):
- self._assert_mutable()
- return super().pop(key, *args)
-
- def popitem(self):
- self._assert_mutable()
- return super().popitem()
-
- def clear(self):
- self._assert_mutable()
- super().clear()
-
- def setdefault(self, key, default=None):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- default = bytes_to_text(default, self.encoding)
- return super().setdefault(key, default)
-
- def copy(self):
- """Return a mutable copy of this object."""
- return self.__deepcopy__({})
-
- def urlencode(self, safe=None):
- """
- Return an encoded string of all query string arguments.
-
- `safe` specifies characters which don't require quoting, for example::
-
- >>> q = QueryDict(mutable=True)
- >>> q['next'] = '/a&b/'
- >>> q.urlencode()
- 'next=%2Fa%26b%2F'
- >>> q.urlencode(safe='/')
- 'next=/a%26b/'
- """
- output = []
- if safe:
- safe = safe.encode(self.encoding)
-
- def encode(k, v):
- return '%s=%s' % ((quote(k, safe), quote(v, safe)))
- else:
- def encode(k, v):
- return urlencode({k: v})
- for k, list_ in self.lists():
- output.extend(
- encode(k.encode(self.encoding), str(v).encode(self.encoding))
- for v in list_
- )
- return '&'.join(output)
-
-
- # It's neither necessary nor appropriate to use
- # django.utils.encoding.force_text for parsing URLs and form inputs. Thus,
- # this slightly more restricted function, used by QueryDict.
- def bytes_to_text(s, encoding):
- """
- Convert bytes objects to strings, using the given encoding. Illegally
- encoded input characters are replaced with Unicode "unknown" codepoint
- (\ufffd).
-
- Return any non-bytes objects without change.
- """
- if isinstance(s, bytes):
- return str(s, encoding, 'replace')
- else:
- return s
-
-
- def split_domain_port(host):
- """
- Return a (domain, port) tuple from a given host.
-
- Returned domain is lowercased. If the host is invalid, the domain will be
- empty.
- """
- host = host.lower()
-
- if not host_validation_re.match(host):
- return '', ''
-
- if host[-1] == ']':
- # It's an IPv6 address without a port.
- return host, ''
- bits = host.rsplit(':', 1)
- domain, port = bits if len(bits) == 2 else (bits[0], '')
- # Remove a trailing dot (if present) from the domain.
- domain = domain[:-1] if domain.endswith('.') else domain
- return domain, port
-
-
- def validate_host(host, allowed_hosts):
- """
- Validate the given host for this site.
-
- Check that the host looks valid and matches a host or host pattern in the
- given list of ``allowed_hosts``. Any pattern beginning with a period
- matches a domain and all its subdomains (e.g. ``.example.com`` matches
- ``example.com`` and any subdomain), ``*`` matches anything, and anything
- else must match exactly.
-
- Note: This function assumes that the given host is lowercased and has
- already had the port, if any, stripped off.
-
- Return ``True`` for a valid host, ``False`` otherwise.
- """
- return any(pattern == '*' or is_same_domain(host, pattern) for pattern in allowed_hosts)
|