Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

request.py 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. import copy
  2. import re
  3. import warnings
  4. from io import BytesIO
  5. from itertools import chain
  6. from urllib.parse import quote, urlencode, urljoin, urlsplit
  7. from django.conf import settings
  8. from django.core import signing
  9. from django.core.exceptions import (
  10. DisallowedHost, ImproperlyConfigured, RequestDataTooBig,
  11. )
  12. from django.core.files import uploadhandler
  13. from django.http.multipartparser import MultiPartParser, MultiPartParserError
  14. from django.utils.datastructures import ImmutableList, MultiValueDict
  15. from django.utils.deprecation import RemovedInDjango30Warning
  16. from django.utils.encoding import escape_uri_path, iri_to_uri
  17. from django.utils.functional import cached_property
  18. from django.utils.http import is_same_domain, limited_parse_qsl
  19. RAISE_ERROR = object()
  20. host_validation_re = re.compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
  21. class UnreadablePostError(IOError):
  22. pass
  23. class RawPostDataException(Exception):
  24. """
  25. You cannot access raw_post_data from a request that has
  26. multipart/* POST data if it has been accessed via POST,
  27. FILES, etc..
  28. """
  29. pass
  30. class HttpRequest:
  31. """A basic HTTP request."""
  32. # The encoding used in GET/POST dicts. None means use default setting.
  33. _encoding = None
  34. _upload_handlers = []
  35. def __init__(self):
  36. # WARNING: The `WSGIRequest` subclass doesn't call `super`.
  37. # Any variable assignment made here should also happen in
  38. # `WSGIRequest.__init__()`.
  39. self.GET = QueryDict(mutable=True)
  40. self.POST = QueryDict(mutable=True)
  41. self.COOKIES = {}
  42. self.META = {}
  43. self.FILES = MultiValueDict()
  44. self.path = ''
  45. self.path_info = ''
  46. self.method = None
  47. self.resolver_match = None
  48. self._post_parse_error = False
  49. self.content_type = None
  50. self.content_params = None
  51. def __repr__(self):
  52. if self.method is None or not self.get_full_path():
  53. return '<%s>' % self.__class__.__name__
  54. return '<%s: %s %r>' % (self.__class__.__name__, self.method, self.get_full_path())
  55. def _get_raw_host(self):
  56. """
  57. Return the HTTP host using the environment or request headers. Skip
  58. allowed hosts protection, so may return an insecure host.
  59. """
  60. # We try three options, in order of decreasing preference.
  61. if settings.USE_X_FORWARDED_HOST and (
  62. 'HTTP_X_FORWARDED_HOST' in self.META):
  63. host = self.META['HTTP_X_FORWARDED_HOST']
  64. elif 'HTTP_HOST' in self.META:
  65. host = self.META['HTTP_HOST']
  66. else:
  67. # Reconstruct the host using the algorithm from PEP 333.
  68. host = self.META['SERVER_NAME']
  69. server_port = self.get_port()
  70. if server_port != ('443' if self.is_secure() else '80'):
  71. host = '%s:%s' % (host, server_port)
  72. return host
  73. def get_host(self):
  74. """Return the HTTP host using the environment or request headers."""
  75. host = self._get_raw_host()
  76. # Allow variants of localhost if ALLOWED_HOSTS is empty and DEBUG=True.
  77. allowed_hosts = settings.ALLOWED_HOSTS
  78. if settings.DEBUG and not allowed_hosts:
  79. allowed_hosts = ['localhost', '127.0.0.1', '[::1]']
  80. domain, port = split_domain_port(host)
  81. if domain and validate_host(domain, allowed_hosts):
  82. return host
  83. else:
  84. msg = "Invalid HTTP_HOST header: %r." % host
  85. if domain:
  86. msg += " You may need to add %r to ALLOWED_HOSTS." % domain
  87. else:
  88. msg += " The domain name provided is not valid according to RFC 1034/1035."
  89. raise DisallowedHost(msg)
  90. def get_port(self):
  91. """Return the port number for the request as a string."""
  92. if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META:
  93. port = self.META['HTTP_X_FORWARDED_PORT']
  94. else:
  95. port = self.META['SERVER_PORT']
  96. return str(port)
  97. def get_full_path(self, force_append_slash=False):
  98. return self._get_full_path(self.path, force_append_slash)
  99. def get_full_path_info(self, force_append_slash=False):
  100. return self._get_full_path(self.path_info, force_append_slash)
  101. def _get_full_path(self, path, force_append_slash):
  102. # RFC 3986 requires query string arguments to be in the ASCII range.
  103. # Rather than crash if this doesn't happen, we encode defensively.
  104. return '%s%s%s' % (
  105. escape_uri_path(path),
  106. '/' if force_append_slash and not path.endswith('/') else '',
  107. ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
  108. )
  109. def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None):
  110. """
  111. Attempt to return a signed cookie. If the signature fails or the
  112. cookie has expired, raise an exception, unless the `default` argument
  113. is provided, in which case return that value.
  114. """
  115. try:
  116. cookie_value = self.COOKIES[key]
  117. except KeyError:
  118. if default is not RAISE_ERROR:
  119. return default
  120. else:
  121. raise
  122. try:
  123. value = signing.get_cookie_signer(salt=key + salt).unsign(
  124. cookie_value, max_age=max_age)
  125. except signing.BadSignature:
  126. if default is not RAISE_ERROR:
  127. return default
  128. else:
  129. raise
  130. return value
  131. def get_raw_uri(self):
  132. """
  133. Return an absolute URI from variables available in this request. Skip
  134. allowed hosts protection, so may return insecure URI.
  135. """
  136. return '{scheme}://{host}{path}'.format(
  137. scheme=self.scheme,
  138. host=self._get_raw_host(),
  139. path=self.get_full_path(),
  140. )
  141. def build_absolute_uri(self, location=None):
  142. """
  143. Build an absolute URI from the location and the variables available in
  144. this request. If no ``location`` is specified, build the absolute URI
  145. using request.get_full_path(). If the location is absolute, convert it
  146. to an RFC 3987 compliant URI and return it. If location is relative or
  147. is scheme-relative (i.e., ``//example.com/``), urljoin() it to a base
  148. URL constructed from the request variables.
  149. """
  150. if location is None:
  151. # Make it an absolute url (but schemeless and domainless) for the
  152. # edge case that the path starts with '//'.
  153. location = '//%s' % self.get_full_path()
  154. bits = urlsplit(location)
  155. if not (bits.scheme and bits.netloc):
  156. # Handle the simple, most common case. If the location is absolute
  157. # and a scheme or host (netloc) isn't provided, skip an expensive
  158. # urljoin() as long as no path segments are '.' or '..'.
  159. if (bits.path.startswith('/') and not bits.scheme and not bits.netloc and
  160. '/./' not in bits.path and '/../' not in bits.path):
  161. # If location starts with '//' but has no netloc, reuse the
  162. # schema and netloc from the current request. Strip the double
  163. # slashes and continue as if it wasn't specified.
  164. if location.startswith('//'):
  165. location = location[2:]
  166. location = self._current_scheme_host + location
  167. else:
  168. # Join the constructed URL with the provided location, which
  169. # allows the provided location to apply query strings to the
  170. # base path.
  171. location = urljoin(self._current_scheme_host + self.path, location)
  172. return iri_to_uri(location)
  173. @cached_property
  174. def _current_scheme_host(self):
  175. return '{}://{}'.format(self.scheme, self.get_host())
  176. def _get_scheme(self):
  177. """
  178. Hook for subclasses like WSGIRequest to implement. Return 'http' by
  179. default.
  180. """
  181. return 'http'
  182. @property
  183. def scheme(self):
  184. if settings.SECURE_PROXY_SSL_HEADER:
  185. try:
  186. header, value = settings.SECURE_PROXY_SSL_HEADER
  187. except ValueError:
  188. raise ImproperlyConfigured(
  189. 'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.'
  190. )
  191. if self.META.get(header) == value:
  192. return 'https'
  193. return self._get_scheme()
  194. def is_secure(self):
  195. return self.scheme == 'https'
  196. def is_ajax(self):
  197. return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
  198. @property
  199. def encoding(self):
  200. return self._encoding
  201. @encoding.setter
  202. def encoding(self, val):
  203. """
  204. Set the encoding used for GET/POST accesses. If the GET or POST
  205. dictionary has already been created, remove and recreate it on the
  206. next access (so that it is decoded correctly).
  207. """
  208. self._encoding = val
  209. if hasattr(self, 'GET'):
  210. del self.GET
  211. if hasattr(self, '_post'):
  212. del self._post
  213. def _initialize_handlers(self):
  214. self._upload_handlers = [uploadhandler.load_handler(handler, self)
  215. for handler in settings.FILE_UPLOAD_HANDLERS]
  216. @property
  217. def upload_handlers(self):
  218. if not self._upload_handlers:
  219. # If there are no upload handlers defined, initialize them from settings.
  220. self._initialize_handlers()
  221. return self._upload_handlers
  222. @upload_handlers.setter
  223. def upload_handlers(self, upload_handlers):
  224. if hasattr(self, '_files'):
  225. raise AttributeError("You cannot set the upload handlers after the upload has been processed.")
  226. self._upload_handlers = upload_handlers
  227. def parse_file_upload(self, META, post_data):
  228. """Return a tuple of (POST QueryDict, FILES MultiValueDict)."""
  229. self.upload_handlers = ImmutableList(
  230. self.upload_handlers,
  231. warning="You cannot alter upload handlers after the upload has been processed."
  232. )
  233. parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
  234. return parser.parse()
  235. @property
  236. def body(self):
  237. if not hasattr(self, '_body'):
  238. if self._read_started:
  239. raise RawPostDataException("You cannot access body after reading from request's data stream")
  240. # Limit the maximum request data size that will be handled in-memory.
  241. if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None and
  242. int(self.META.get('CONTENT_LENGTH') or 0) > settings.DATA_UPLOAD_MAX_MEMORY_SIZE):
  243. raise RequestDataTooBig('Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.')
  244. try:
  245. self._body = self.read()
  246. except IOError as e:
  247. raise UnreadablePostError(*e.args) from e
  248. self._stream = BytesIO(self._body)
  249. return self._body
  250. def _mark_post_parse_error(self):
  251. self._post = QueryDict()
  252. self._files = MultiValueDict()
  253. self._post_parse_error = True
  254. def _load_post_and_files(self):
  255. """Populate self._post and self._files if the content-type is a form type"""
  256. if self.method != 'POST':
  257. self._post, self._files = QueryDict(encoding=self._encoding), MultiValueDict()
  258. return
  259. if self._read_started and not hasattr(self, '_body'):
  260. self._mark_post_parse_error()
  261. return
  262. if self.content_type == 'multipart/form-data':
  263. if hasattr(self, '_body'):
  264. # Use already read data
  265. data = BytesIO(self._body)
  266. else:
  267. data = self
  268. try:
  269. self._post, self._files = self.parse_file_upload(self.META, data)
  270. except MultiPartParserError:
  271. # An error occurred while parsing POST data. Since when
  272. # formatting the error the request handler might access
  273. # self.POST, set self._post and self._file to prevent
  274. # attempts to parse POST data again.
  275. # Mark that an error occurred. This allows self.__repr__ to
  276. # be explicit about it instead of simply representing an
  277. # empty POST
  278. self._mark_post_parse_error()
  279. raise
  280. elif self.content_type == 'application/x-www-form-urlencoded':
  281. self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict()
  282. else:
  283. self._post, self._files = QueryDict(encoding=self._encoding), MultiValueDict()
  284. def close(self):
  285. if hasattr(self, '_files'):
  286. for f in chain.from_iterable(l[1] for l in self._files.lists()):
  287. f.close()
  288. # File-like and iterator interface.
  289. #
  290. # Expects self._stream to be set to an appropriate source of bytes by
  291. # a corresponding request subclass (e.g. WSGIRequest).
  292. # Also when request data has already been read by request.POST or
  293. # request.body, self._stream points to a BytesIO instance
  294. # containing that data.
  295. def read(self, *args, **kwargs):
  296. self._read_started = True
  297. try:
  298. return self._stream.read(*args, **kwargs)
  299. except IOError as e:
  300. raise UnreadablePostError(*e.args) from e
  301. def readline(self, *args, **kwargs):
  302. self._read_started = True
  303. try:
  304. return self._stream.readline(*args, **kwargs)
  305. except IOError as e:
  306. raise UnreadablePostError(*e.args) from e
  307. def __iter__(self):
  308. while True:
  309. buf = self.readline()
  310. if not buf:
  311. break
  312. yield buf
  313. def xreadlines(self):
  314. warnings.warn(
  315. 'HttpRequest.xreadlines() is deprecated in favor of iterating the '
  316. 'request.', RemovedInDjango30Warning, stacklevel=2,
  317. )
  318. yield from self
  319. def readlines(self):
  320. return list(self)
  321. class QueryDict(MultiValueDict):
  322. """
  323. A specialized MultiValueDict which represents a query string.
  324. A QueryDict can be used to represent GET or POST data. It subclasses
  325. MultiValueDict since keys in such data can be repeated, for instance
  326. in the data from a form with a <select multiple> field.
  327. By default QueryDicts are immutable, though the copy() method
  328. will always return a mutable copy.
  329. Both keys and values set on this class are converted from the given encoding
  330. (DEFAULT_CHARSET by default) to str.
  331. """
  332. # These are both reset in __init__, but is specified here at the class
  333. # level so that unpickling will have valid values
  334. _mutable = True
  335. _encoding = None
  336. def __init__(self, query_string=None, mutable=False, encoding=None):
  337. super().__init__()
  338. self.encoding = encoding or settings.DEFAULT_CHARSET
  339. query_string = query_string or ''
  340. parse_qsl_kwargs = {
  341. 'keep_blank_values': True,
  342. 'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
  343. 'encoding': self.encoding,
  344. }
  345. if isinstance(query_string, bytes):
  346. # query_string normally contains URL-encoded data, a subset of ASCII.
  347. try:
  348. query_string = query_string.decode(self.encoding)
  349. except UnicodeDecodeError:
  350. # ... but some user agents are misbehaving :-(
  351. query_string = query_string.decode('iso-8859-1')
  352. for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs):
  353. self.appendlist(key, value)
  354. self._mutable = mutable
  355. @classmethod
  356. def fromkeys(cls, iterable, value='', mutable=False, encoding=None):
  357. """
  358. Return a new QueryDict with keys (may be repeated) from an iterable and
  359. values from value.
  360. """
  361. q = cls('', mutable=True, encoding=encoding)
  362. for key in iterable:
  363. q.appendlist(key, value)
  364. if not mutable:
  365. q._mutable = False
  366. return q
  367. @property
  368. def encoding(self):
  369. if self._encoding is None:
  370. self._encoding = settings.DEFAULT_CHARSET
  371. return self._encoding
  372. @encoding.setter
  373. def encoding(self, value):
  374. self._encoding = value
  375. def _assert_mutable(self):
  376. if not self._mutable:
  377. raise AttributeError("This QueryDict instance is immutable")
  378. def __setitem__(self, key, value):
  379. self._assert_mutable()
  380. key = bytes_to_text(key, self.encoding)
  381. value = bytes_to_text(value, self.encoding)
  382. super().__setitem__(key, value)
  383. def __delitem__(self, key):
  384. self._assert_mutable()
  385. super().__delitem__(key)
  386. def __copy__(self):
  387. result = self.__class__('', mutable=True, encoding=self.encoding)
  388. for key, value in self.lists():
  389. result.setlist(key, value)
  390. return result
  391. def __deepcopy__(self, memo):
  392. result = self.__class__('', mutable=True, encoding=self.encoding)
  393. memo[id(self)] = result
  394. for key, value in self.lists():
  395. result.setlist(copy.deepcopy(key, memo), copy.deepcopy(value, memo))
  396. return result
  397. def setlist(self, key, list_):
  398. self._assert_mutable()
  399. key = bytes_to_text(key, self.encoding)
  400. list_ = [bytes_to_text(elt, self.encoding) for elt in list_]
  401. super().setlist(key, list_)
  402. def setlistdefault(self, key, default_list=None):
  403. self._assert_mutable()
  404. return super().setlistdefault(key, default_list)
  405. def appendlist(self, key, value):
  406. self._assert_mutable()
  407. key = bytes_to_text(key, self.encoding)
  408. value = bytes_to_text(value, self.encoding)
  409. super().appendlist(key, value)
  410. def pop(self, key, *args):
  411. self._assert_mutable()
  412. return super().pop(key, *args)
  413. def popitem(self):
  414. self._assert_mutable()
  415. return super().popitem()
  416. def clear(self):
  417. self._assert_mutable()
  418. super().clear()
  419. def setdefault(self, key, default=None):
  420. self._assert_mutable()
  421. key = bytes_to_text(key, self.encoding)
  422. default = bytes_to_text(default, self.encoding)
  423. return super().setdefault(key, default)
  424. def copy(self):
  425. """Return a mutable copy of this object."""
  426. return self.__deepcopy__({})
  427. def urlencode(self, safe=None):
  428. """
  429. Return an encoded string of all query string arguments.
  430. `safe` specifies characters which don't require quoting, for example::
  431. >>> q = QueryDict(mutable=True)
  432. >>> q['next'] = '/a&b/'
  433. >>> q.urlencode()
  434. 'next=%2Fa%26b%2F'
  435. >>> q.urlencode(safe='/')
  436. 'next=/a%26b/'
  437. """
  438. output = []
  439. if safe:
  440. safe = safe.encode(self.encoding)
  441. def encode(k, v):
  442. return '%s=%s' % ((quote(k, safe), quote(v, safe)))
  443. else:
  444. def encode(k, v):
  445. return urlencode({k: v})
  446. for k, list_ in self.lists():
  447. output.extend(
  448. encode(k.encode(self.encoding), str(v).encode(self.encoding))
  449. for v in list_
  450. )
  451. return '&'.join(output)
  452. # It's neither necessary nor appropriate to use
  453. # django.utils.encoding.force_text for parsing URLs and form inputs. Thus,
  454. # this slightly more restricted function, used by QueryDict.
  455. def bytes_to_text(s, encoding):
  456. """
  457. Convert bytes objects to strings, using the given encoding. Illegally
  458. encoded input characters are replaced with Unicode "unknown" codepoint
  459. (\ufffd).
  460. Return any non-bytes objects without change.
  461. """
  462. if isinstance(s, bytes):
  463. return str(s, encoding, 'replace')
  464. else:
  465. return s
  466. def split_domain_port(host):
  467. """
  468. Return a (domain, port) tuple from a given host.
  469. Returned domain is lower-cased. If the host is invalid, the domain will be
  470. empty.
  471. """
  472. host = host.lower()
  473. if not host_validation_re.match(host):
  474. return '', ''
  475. if host[-1] == ']':
  476. # It's an IPv6 address without a port.
  477. return host, ''
  478. bits = host.rsplit(':', 1)
  479. domain, port = bits if len(bits) == 2 else (bits[0], '')
  480. # Remove a trailing dot (if present) from the domain.
  481. domain = domain[:-1] if domain.endswith('.') else domain
  482. return domain, port
  483. def validate_host(host, allowed_hosts):
  484. """
  485. Validate the given host for this site.
  486. Check that the host looks valid and matches a host or host pattern in the
  487. given list of ``allowed_hosts``. Any pattern beginning with a period
  488. matches a domain and all its subdomains (e.g. ``.example.com`` matches
  489. ``example.com`` and any subdomain), ``*`` matches anything, and anything
  490. else must match exactly.
  491. Note: This function assumes that the given host is lower-cased and has
  492. already had the port, if any, stripped off.
  493. Return ``True`` for a valid host, ``False`` otherwise.
  494. """
  495. return any(pattern == '*' or is_same_domain(host, pattern) for pattern in allowed_hosts)