Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

http.py 6.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. from __future__ import annotations
  2. import asyncio
  3. import re
  4. from typing import Tuple
  5. from ..datastructures import Headers
  6. from ..exceptions import SecurityError
  7. __all__ = ["read_request", "read_response"]
  8. MAX_HEADERS = 128
  9. MAX_LINE = 8192
  10. def d(value: bytes) -> str:
  11. """
  12. Decode a bytestring for interpolating into an error message.
  13. """
  14. return value.decode(errors="backslashreplace")
  15. # See https://www.rfc-editor.org/rfc/rfc7230.html#appendix-B.
  16. # Regex for validating header names.
  17. _token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
  18. # Regex for validating header values.
  19. # We don't attempt to support obsolete line folding.
  20. # Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
  21. # The ABNF is complicated because it attempts to express that optional
  22. # whitespace is ignored. We strip whitespace and don't revalidate that.
  23. # See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
  24. _value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
  25. async def read_request(stream: asyncio.StreamReader) -> Tuple[str, Headers]:
  26. """
  27. Read an HTTP/1.1 GET request and return ``(path, headers)``.
  28. ``path`` isn't URL-decoded or validated in any way.
  29. ``path`` and ``headers`` are expected to contain only ASCII characters.
  30. Other characters are represented with surrogate escapes.
  31. :func:`read_request` doesn't attempt to read the request body because
  32. WebSocket handshake requests don't have one. If the request contains a
  33. body, it may be read from ``stream`` after this coroutine returns.
  34. Args:
  35. stream: Input to read the request from.
  36. Raises:
  37. EOFError: If the connection is closed without a full HTTP request.
  38. SecurityError: If the request exceeds a security limit.
  39. ValueError: If the request isn't well formatted.
  40. """
  41. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.1
  42. # Parsing is simple because fixed values are expected for method and
  43. # version and because path isn't checked. Since WebSocket software tends
  44. # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
  45. try:
  46. request_line = await read_line(stream)
  47. except EOFError as exc:
  48. raise EOFError("connection closed while reading HTTP request line") from exc
  49. try:
  50. method, raw_path, version = request_line.split(b" ", 2)
  51. except ValueError: # not enough values to unpack (expected 3, got 1-2)
  52. raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
  53. if method != b"GET":
  54. raise ValueError(f"unsupported HTTP method: {d(method)}")
  55. if version != b"HTTP/1.1":
  56. raise ValueError(f"unsupported HTTP version: {d(version)}")
  57. path = raw_path.decode("ascii", "surrogateescape")
  58. headers = await read_headers(stream)
  59. return path, headers
  60. async def read_response(stream: asyncio.StreamReader) -> Tuple[int, str, Headers]:
  61. """
  62. Read an HTTP/1.1 response and return ``(status_code, reason, headers)``.
  63. ``reason`` and ``headers`` are expected to contain only ASCII characters.
  64. Other characters are represented with surrogate escapes.
  65. :func:`read_request` doesn't attempt to read the response body because
  66. WebSocket handshake responses don't have one. If the response contains a
  67. body, it may be read from ``stream`` after this coroutine returns.
  68. Args:
  69. stream: Input to read the response from.
  70. Raises:
  71. EOFError: If the connection is closed without a full HTTP response.
  72. SecurityError: If the response exceeds a security limit.
  73. ValueError: If the response isn't well formatted.
  74. """
  75. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.2
  76. # As in read_request, parsing is simple because a fixed value is expected
  77. # for version, status_code is a 3-digit number, and reason can be ignored.
  78. try:
  79. status_line = await read_line(stream)
  80. except EOFError as exc:
  81. raise EOFError("connection closed while reading HTTP status line") from exc
  82. try:
  83. version, raw_status_code, raw_reason = status_line.split(b" ", 2)
  84. except ValueError: # not enough values to unpack (expected 3, got 1-2)
  85. raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
  86. if version != b"HTTP/1.1":
  87. raise ValueError(f"unsupported HTTP version: {d(version)}")
  88. try:
  89. status_code = int(raw_status_code)
  90. except ValueError: # invalid literal for int() with base 10
  91. raise ValueError(f"invalid HTTP status code: {d(raw_status_code)}") from None
  92. if not 100 <= status_code < 1000:
  93. raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}")
  94. if not _value_re.fullmatch(raw_reason):
  95. raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
  96. reason = raw_reason.decode()
  97. headers = await read_headers(stream)
  98. return status_code, reason, headers
  99. async def read_headers(stream: asyncio.StreamReader) -> Headers:
  100. """
  101. Read HTTP headers from ``stream``.
  102. Non-ASCII characters are represented with surrogate escapes.
  103. """
  104. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2
  105. # We don't attempt to support obsolete line folding.
  106. headers = Headers()
  107. for _ in range(MAX_HEADERS + 1):
  108. try:
  109. line = await read_line(stream)
  110. except EOFError as exc:
  111. raise EOFError("connection closed while reading HTTP headers") from exc
  112. if line == b"":
  113. break
  114. try:
  115. raw_name, raw_value = line.split(b":", 1)
  116. except ValueError: # not enough values to unpack (expected 2, got 1)
  117. raise ValueError(f"invalid HTTP header line: {d(line)}") from None
  118. if not _token_re.fullmatch(raw_name):
  119. raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
  120. raw_value = raw_value.strip(b" \t")
  121. if not _value_re.fullmatch(raw_value):
  122. raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
  123. name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
  124. value = raw_value.decode("ascii", "surrogateescape")
  125. headers[name] = value
  126. else:
  127. raise SecurityError("too many HTTP headers")
  128. return headers
  129. async def read_line(stream: asyncio.StreamReader) -> bytes:
  130. """
  131. Read a single line from ``stream``.
  132. CRLF is stripped from the return value.
  133. """
  134. # Security: this is bounded by the StreamReader's limit (default = 32 KiB).
  135. line = await stream.readline()
  136. # Security: this guarantees header values are small (hard-coded = 8 KiB)
  137. if len(line) > MAX_LINE:
  138. raise SecurityError("line too long")
  139. # Not mandatory but safe - https://www.rfc-editor.org/rfc/rfc7230.html#section-3.5
  140. if not line.endswith(b"\r\n"):
  141. raise EOFError("line without CRLF")
  142. return line[:-2]