Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

http11.py 12KB

1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. from __future__ import annotations
  2. import dataclasses
  3. import re
  4. import warnings
  5. from typing import Callable, Generator, Optional
  6. from . import datastructures, exceptions
  7. # Maximum total size of headers is around 128 * 8 KiB = 1 MiB.
  8. MAX_HEADERS = 128
  9. # Limit request line and header lines. 8KiB is the most common default
  10. # configuration of popular HTTP servers.
  11. MAX_LINE = 8192
  12. # Support for HTTP response bodies is intended to read an error message
  13. # returned by a server. It isn't designed to perform large file transfers.
  14. MAX_BODY = 2**20 # 1 MiB
  15. def d(value: bytes) -> str:
  16. """
  17. Decode a bytestring for interpolating into an error message.
  18. """
  19. return value.decode(errors="backslashreplace")
  20. # See https://www.rfc-editor.org/rfc/rfc7230.html#appendix-B.
  21. # Regex for validating header names.
  22. _token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
  23. # Regex for validating header values.
  24. # We don't attempt to support obsolete line folding.
  25. # Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
  26. # The ABNF is complicated because it attempts to express that optional
  27. # whitespace is ignored. We strip whitespace and don't revalidate that.
  28. # See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
  29. _value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
  30. @dataclasses.dataclass
  31. class Request:
  32. """
  33. WebSocket handshake request.
  34. Attributes:
  35. path: Request path, including optional query.
  36. headers: Request headers.
  37. """
  38. path: str
  39. headers: datastructures.Headers
  40. # body isn't useful is the context of this library.
  41. _exception: Optional[Exception] = None
  42. @property
  43. def exception(self) -> Optional[Exception]: # pragma: no cover
  44. warnings.warn(
  45. "Request.exception is deprecated; "
  46. "use ServerProtocol.handshake_exc instead",
  47. DeprecationWarning,
  48. )
  49. return self._exception
  50. @classmethod
  51. def parse(
  52. cls,
  53. read_line: Callable[[int], Generator[None, None, bytes]],
  54. ) -> Generator[None, None, Request]:
  55. """
  56. Parse a WebSocket handshake request.
  57. This is a generator-based coroutine.
  58. The request path isn't URL-decoded or validated in any way.
  59. The request path and headers are expected to contain only ASCII
  60. characters. Other characters are represented with surrogate escapes.
  61. :meth:`parse` doesn't attempt to read the request body because
  62. WebSocket handshake requests don't have one. If the request contains a
  63. body, it may be read from the data stream after :meth:`parse` returns.
  64. Args:
  65. read_line: generator-based coroutine that reads a LF-terminated
  66. line or raises an exception if there isn't enough data
  67. Raises:
  68. EOFError: if the connection is closed without a full HTTP request.
  69. SecurityError: if the request exceeds a security limit.
  70. ValueError: if the request isn't well formatted.
  71. """
  72. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.1
  73. # Parsing is simple because fixed values are expected for method and
  74. # version and because path isn't checked. Since WebSocket software tends
  75. # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
  76. try:
  77. request_line = yield from parse_line(read_line)
  78. except EOFError as exc:
  79. raise EOFError("connection closed while reading HTTP request line") from exc
  80. try:
  81. method, raw_path, version = request_line.split(b" ", 2)
  82. except ValueError: # not enough values to unpack (expected 3, got 1-2)
  83. raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
  84. if method != b"GET":
  85. raise ValueError(f"unsupported HTTP method: {d(method)}")
  86. if version != b"HTTP/1.1":
  87. raise ValueError(f"unsupported HTTP version: {d(version)}")
  88. path = raw_path.decode("ascii", "surrogateescape")
  89. headers = yield from parse_headers(read_line)
  90. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.3.3
  91. if "Transfer-Encoding" in headers:
  92. raise NotImplementedError("transfer codings aren't supported")
  93. if "Content-Length" in headers:
  94. raise ValueError("unsupported request body")
  95. return cls(path, headers)
  96. def serialize(self) -> bytes:
  97. """
  98. Serialize a WebSocket handshake request.
  99. """
  100. # Since the request line and headers only contain ASCII characters,
  101. # we can keep this simple.
  102. request = f"GET {self.path} HTTP/1.1\r\n".encode()
  103. request += self.headers.serialize()
  104. return request
  105. @dataclasses.dataclass
  106. class Response:
  107. """
  108. WebSocket handshake response.
  109. Attributes:
  110. status_code: Response code.
  111. reason_phrase: Response reason.
  112. headers: Response headers.
  113. body: Response body, if any.
  114. """
  115. status_code: int
  116. reason_phrase: str
  117. headers: datastructures.Headers
  118. body: Optional[bytes] = None
  119. _exception: Optional[Exception] = None
  120. @property
  121. def exception(self) -> Optional[Exception]: # pragma: no cover
  122. warnings.warn(
  123. "Response.exception is deprecated; "
  124. "use ClientProtocol.handshake_exc instead",
  125. DeprecationWarning,
  126. )
  127. return self._exception
  128. @classmethod
  129. def parse(
  130. cls,
  131. read_line: Callable[[int], Generator[None, None, bytes]],
  132. read_exact: Callable[[int], Generator[None, None, bytes]],
  133. read_to_eof: Callable[[int], Generator[None, None, bytes]],
  134. ) -> Generator[None, None, Response]:
  135. """
  136. Parse a WebSocket handshake response.
  137. This is a generator-based coroutine.
  138. The reason phrase and headers are expected to contain only ASCII
  139. characters. Other characters are represented with surrogate escapes.
  140. Args:
  141. read_line: generator-based coroutine that reads a LF-terminated
  142. line or raises an exception if there isn't enough data.
  143. read_exact: generator-based coroutine that reads the requested
  144. bytes or raises an exception if there isn't enough data.
  145. read_to_eof: generator-based coroutine that reads until the end
  146. of the stream.
  147. Raises:
  148. EOFError: if the connection is closed without a full HTTP response.
  149. SecurityError: if the response exceeds a security limit.
  150. LookupError: if the response isn't well formatted.
  151. ValueError: if the response isn't well formatted.
  152. """
  153. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.2
  154. try:
  155. status_line = yield from parse_line(read_line)
  156. except EOFError as exc:
  157. raise EOFError("connection closed while reading HTTP status line") from exc
  158. try:
  159. version, raw_status_code, raw_reason = status_line.split(b" ", 2)
  160. except ValueError: # not enough values to unpack (expected 3, got 1-2)
  161. raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
  162. if version != b"HTTP/1.1":
  163. raise ValueError(f"unsupported HTTP version: {d(version)}")
  164. try:
  165. status_code = int(raw_status_code)
  166. except ValueError: # invalid literal for int() with base 10
  167. raise ValueError(
  168. f"invalid HTTP status code: {d(raw_status_code)}"
  169. ) from None
  170. if not 100 <= status_code < 1000:
  171. raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}")
  172. if not _value_re.fullmatch(raw_reason):
  173. raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
  174. reason = raw_reason.decode()
  175. headers = yield from parse_headers(read_line)
  176. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.3.3
  177. if "Transfer-Encoding" in headers:
  178. raise NotImplementedError("transfer codings aren't supported")
  179. # Since websockets only does GET requests (no HEAD, no CONNECT), all
  180. # responses except 1xx, 204, and 304 include a message body.
  181. if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
  182. body = None
  183. else:
  184. content_length: Optional[int]
  185. try:
  186. # MultipleValuesError is sufficiently unlikely that we don't
  187. # attempt to handle it. Instead we document that its parent
  188. # class, LookupError, may be raised.
  189. raw_content_length = headers["Content-Length"]
  190. except KeyError:
  191. content_length = None
  192. else:
  193. content_length = int(raw_content_length)
  194. if content_length is None:
  195. try:
  196. body = yield from read_to_eof(MAX_BODY)
  197. except RuntimeError:
  198. raise exceptions.SecurityError(
  199. f"body too large: over {MAX_BODY} bytes"
  200. )
  201. elif content_length > MAX_BODY:
  202. raise exceptions.SecurityError(
  203. f"body too large: {content_length} bytes"
  204. )
  205. else:
  206. body = yield from read_exact(content_length)
  207. return cls(status_code, reason, headers, body)
  208. def serialize(self) -> bytes:
  209. """
  210. Serialize a WebSocket handshake response.
  211. """
  212. # Since the status line and headers only contain ASCII characters,
  213. # we can keep this simple.
  214. response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode()
  215. response += self.headers.serialize()
  216. if self.body is not None:
  217. response += self.body
  218. return response
  219. def parse_headers(
  220. read_line: Callable[[int], Generator[None, None, bytes]],
  221. ) -> Generator[None, None, datastructures.Headers]:
  222. """
  223. Parse HTTP headers.
  224. Non-ASCII characters are represented with surrogate escapes.
  225. Args:
  226. read_line: generator-based coroutine that reads a LF-terminated line
  227. or raises an exception if there isn't enough data.
  228. Raises:
  229. EOFError: if the connection is closed without complete headers.
  230. SecurityError: if the request exceeds a security limit.
  231. ValueError: if the request isn't well formatted.
  232. """
  233. # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2
  234. # We don't attempt to support obsolete line folding.
  235. headers = datastructures.Headers()
  236. for _ in range(MAX_HEADERS + 1):
  237. try:
  238. line = yield from parse_line(read_line)
  239. except EOFError as exc:
  240. raise EOFError("connection closed while reading HTTP headers") from exc
  241. if line == b"":
  242. break
  243. try:
  244. raw_name, raw_value = line.split(b":", 1)
  245. except ValueError: # not enough values to unpack (expected 2, got 1)
  246. raise ValueError(f"invalid HTTP header line: {d(line)}") from None
  247. if not _token_re.fullmatch(raw_name):
  248. raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
  249. raw_value = raw_value.strip(b" \t")
  250. if not _value_re.fullmatch(raw_value):
  251. raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
  252. name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
  253. value = raw_value.decode("ascii", "surrogateescape")
  254. headers[name] = value
  255. else:
  256. raise exceptions.SecurityError("too many HTTP headers")
  257. return headers
  258. def parse_line(
  259. read_line: Callable[[int], Generator[None, None, bytes]],
  260. ) -> Generator[None, None, bytes]:
  261. """
  262. Parse a single line.
  263. CRLF is stripped from the return value.
  264. Args:
  265. read_line: generator-based coroutine that reads a LF-terminated line
  266. or raises an exception if there isn't enough data.
  267. Raises:
  268. EOFError: if the connection is closed without a CRLF.
  269. SecurityError: if the response exceeds a security limit.
  270. """
  271. try:
  272. line = yield from read_line(MAX_LINE)
  273. except RuntimeError:
  274. raise exceptions.SecurityError("line too long")
  275. # Not mandatory but safe - https://www.rfc-editor.org/rfc/rfc7230.html#section-3.5
  276. if not line.endswith(b"\r\n"):
  277. raise EOFError("line without CRLF")
  278. return line[:-2]