Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

frames.py 12KB

1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. from __future__ import annotations
  2. import dataclasses
  3. import enum
  4. import io
  5. import secrets
  6. import struct
  7. from typing import Callable, Generator, Optional, Sequence, Tuple
  8. from . import exceptions, extensions
  9. from .typing import Data
  10. try:
  11. from .speedups import apply_mask
  12. except ImportError:
  13. from .utils import apply_mask
  14. __all__ = [
  15. "Opcode",
  16. "OP_CONT",
  17. "OP_TEXT",
  18. "OP_BINARY",
  19. "OP_CLOSE",
  20. "OP_PING",
  21. "OP_PONG",
  22. "DATA_OPCODES",
  23. "CTRL_OPCODES",
  24. "Frame",
  25. "prepare_data",
  26. "prepare_ctrl",
  27. "Close",
  28. ]
  29. class Opcode(enum.IntEnum):
  30. """Opcode values for WebSocket frames."""
  31. CONT, TEXT, BINARY = 0x00, 0x01, 0x02
  32. CLOSE, PING, PONG = 0x08, 0x09, 0x0A
  33. OP_CONT = Opcode.CONT
  34. OP_TEXT = Opcode.TEXT
  35. OP_BINARY = Opcode.BINARY
  36. OP_CLOSE = Opcode.CLOSE
  37. OP_PING = Opcode.PING
  38. OP_PONG = Opcode.PONG
  39. DATA_OPCODES = OP_CONT, OP_TEXT, OP_BINARY
  40. CTRL_OPCODES = OP_CLOSE, OP_PING, OP_PONG
  41. # See https://www.iana.org/assignments/websocket/websocket.xhtml
  42. CLOSE_CODES = {
  43. 1000: "OK",
  44. 1001: "going away",
  45. 1002: "protocol error",
  46. 1003: "unsupported type",
  47. # 1004 is reserved
  48. 1005: "no status code [internal]",
  49. 1006: "connection closed abnormally [internal]",
  50. 1007: "invalid data",
  51. 1008: "policy violation",
  52. 1009: "message too big",
  53. 1010: "extension required",
  54. 1011: "unexpected error",
  55. 1012: "service restart",
  56. 1013: "try again later",
  57. 1014: "bad gateway",
  58. 1015: "TLS failure [internal]",
  59. }
  60. # Close code that are allowed in a close frame.
  61. # Using a set optimizes `code in EXTERNAL_CLOSE_CODES`.
  62. EXTERNAL_CLOSE_CODES = {
  63. 1000,
  64. 1001,
  65. 1002,
  66. 1003,
  67. 1007,
  68. 1008,
  69. 1009,
  70. 1010,
  71. 1011,
  72. 1012,
  73. 1013,
  74. 1014,
  75. }
  76. OK_CLOSE_CODES = {
  77. 1000,
  78. 1001,
  79. 1005,
  80. }
  81. BytesLike = bytes, bytearray, memoryview
  82. @dataclasses.dataclass
  83. class Frame:
  84. """
  85. WebSocket frame.
  86. Attributes:
  87. opcode: Opcode.
  88. data: Payload data.
  89. fin: FIN bit.
  90. rsv1: RSV1 bit.
  91. rsv2: RSV2 bit.
  92. rsv3: RSV3 bit.
  93. Only these fields are needed. The MASK bit, payload length and masking-key
  94. are handled on the fly when parsing and serializing frames.
  95. """
  96. opcode: Opcode
  97. data: bytes
  98. fin: bool = True
  99. rsv1: bool = False
  100. rsv2: bool = False
  101. rsv3: bool = False
  102. def __str__(self) -> str:
  103. """
  104. Return a human-readable representation of a frame.
  105. """
  106. coding = None
  107. length = f"{len(self.data)} byte{'' if len(self.data) == 1 else 's'}"
  108. non_final = "" if self.fin else "continued"
  109. if self.opcode is OP_TEXT:
  110. # Decoding only the beginning and the end is needlessly hard.
  111. # Decode the entire payload then elide later if necessary.
  112. data = repr(self.data.decode())
  113. elif self.opcode is OP_BINARY:
  114. # We'll show at most the first 16 bytes and the last 8 bytes.
  115. # Encode just what we need, plus two dummy bytes to elide later.
  116. binary = self.data
  117. if len(binary) > 25:
  118. binary = b"".join([binary[:16], b"\x00\x00", binary[-8:]])
  119. data = " ".join(f"{byte:02x}" for byte in binary)
  120. elif self.opcode is OP_CLOSE:
  121. data = str(Close.parse(self.data))
  122. elif self.data:
  123. # We don't know if a Continuation frame contains text or binary.
  124. # Ping and Pong frames could contain UTF-8.
  125. # Attempt to decode as UTF-8 and display it as text; fallback to
  126. # binary. If self.data is a memoryview, it has no decode() method,
  127. # which raises AttributeError.
  128. try:
  129. data = repr(self.data.decode())
  130. coding = "text"
  131. except (UnicodeDecodeError, AttributeError):
  132. binary = self.data
  133. if len(binary) > 25:
  134. binary = b"".join([binary[:16], b"\x00\x00", binary[-8:]])
  135. data = " ".join(f"{byte:02x}" for byte in binary)
  136. coding = "binary"
  137. else:
  138. data = "''"
  139. if len(data) > 75:
  140. data = data[:48] + "..." + data[-24:]
  141. metadata = ", ".join(filter(None, [coding, length, non_final]))
  142. return f"{self.opcode.name} {data} [{metadata}]"
  143. @classmethod
  144. def parse(
  145. cls,
  146. read_exact: Callable[[int], Generator[None, None, bytes]],
  147. *,
  148. mask: bool,
  149. max_size: Optional[int] = None,
  150. extensions: Optional[Sequence[extensions.Extension]] = None,
  151. ) -> Generator[None, None, Frame]:
  152. """
  153. Parse a WebSocket frame.
  154. This is a generator-based coroutine.
  155. Args:
  156. read_exact: generator-based coroutine that reads the requested
  157. bytes or raises an exception if there isn't enough data.
  158. mask: whether the frame should be masked i.e. whether the read
  159. happens on the server side.
  160. max_size: maximum payload size in bytes.
  161. extensions: list of extensions, applied in reverse order.
  162. Raises:
  163. EOFError: if the connection is closed without a full WebSocket frame.
  164. UnicodeDecodeError: if the frame contains invalid UTF-8.
  165. PayloadTooBig: if the frame's payload size exceeds ``max_size``.
  166. ProtocolError: if the frame contains incorrect values.
  167. """
  168. # Read the header.
  169. data = yield from read_exact(2)
  170. head1, head2 = struct.unpack("!BB", data)
  171. # While not Pythonic, this is marginally faster than calling bool().
  172. fin = True if head1 & 0b10000000 else False
  173. rsv1 = True if head1 & 0b01000000 else False
  174. rsv2 = True if head1 & 0b00100000 else False
  175. rsv3 = True if head1 & 0b00010000 else False
  176. try:
  177. opcode = Opcode(head1 & 0b00001111)
  178. except ValueError as exc:
  179. raise exceptions.ProtocolError("invalid opcode") from exc
  180. if (True if head2 & 0b10000000 else False) != mask:
  181. raise exceptions.ProtocolError("incorrect masking")
  182. length = head2 & 0b01111111
  183. if length == 126:
  184. data = yield from read_exact(2)
  185. (length,) = struct.unpack("!H", data)
  186. elif length == 127:
  187. data = yield from read_exact(8)
  188. (length,) = struct.unpack("!Q", data)
  189. if max_size is not None and length > max_size:
  190. raise exceptions.PayloadTooBig(
  191. f"over size limit ({length} > {max_size} bytes)"
  192. )
  193. if mask:
  194. mask_bytes = yield from read_exact(4)
  195. # Read the data.
  196. data = yield from read_exact(length)
  197. if mask:
  198. data = apply_mask(data, mask_bytes)
  199. frame = cls(opcode, data, fin, rsv1, rsv2, rsv3)
  200. if extensions is None:
  201. extensions = []
  202. for extension in reversed(extensions):
  203. frame = extension.decode(frame, max_size=max_size)
  204. frame.check()
  205. return frame
  206. def serialize(
  207. self,
  208. *,
  209. mask: bool,
  210. extensions: Optional[Sequence[extensions.Extension]] = None,
  211. ) -> bytes:
  212. """
  213. Serialize a WebSocket frame.
  214. Args:
  215. mask: whether the frame should be masked i.e. whether the write
  216. happens on the client side.
  217. extensions: list of extensions, applied in order.
  218. Raises:
  219. ProtocolError: if the frame contains incorrect values.
  220. """
  221. self.check()
  222. if extensions is None:
  223. extensions = []
  224. for extension in extensions:
  225. self = extension.encode(self)
  226. output = io.BytesIO()
  227. # Prepare the header.
  228. head1 = (
  229. (0b10000000 if self.fin else 0)
  230. | (0b01000000 if self.rsv1 else 0)
  231. | (0b00100000 if self.rsv2 else 0)
  232. | (0b00010000 if self.rsv3 else 0)
  233. | self.opcode
  234. )
  235. head2 = 0b10000000 if mask else 0
  236. length = len(self.data)
  237. if length < 126:
  238. output.write(struct.pack("!BB", head1, head2 | length))
  239. elif length < 65536:
  240. output.write(struct.pack("!BBH", head1, head2 | 126, length))
  241. else:
  242. output.write(struct.pack("!BBQ", head1, head2 | 127, length))
  243. if mask:
  244. mask_bytes = secrets.token_bytes(4)
  245. output.write(mask_bytes)
  246. # Prepare the data.
  247. if mask:
  248. data = apply_mask(self.data, mask_bytes)
  249. else:
  250. data = self.data
  251. output.write(data)
  252. return output.getvalue()
  253. def check(self) -> None:
  254. """
  255. Check that reserved bits and opcode have acceptable values.
  256. Raises:
  257. ProtocolError: if a reserved bit or the opcode is invalid.
  258. """
  259. if self.rsv1 or self.rsv2 or self.rsv3:
  260. raise exceptions.ProtocolError("reserved bits must be 0")
  261. if self.opcode in CTRL_OPCODES:
  262. if len(self.data) > 125:
  263. raise exceptions.ProtocolError("control frame too long")
  264. if not self.fin:
  265. raise exceptions.ProtocolError("fragmented control frame")
  266. def prepare_data(data: Data) -> Tuple[int, bytes]:
  267. """
  268. Convert a string or byte-like object to an opcode and a bytes-like object.
  269. This function is designed for data frames.
  270. If ``data`` is a :class:`str`, return ``OP_TEXT`` and a :class:`bytes`
  271. object encoding ``data`` in UTF-8.
  272. If ``data`` is a bytes-like object, return ``OP_BINARY`` and a bytes-like
  273. object.
  274. Raises:
  275. TypeError: if ``data`` doesn't have a supported type.
  276. """
  277. if isinstance(data, str):
  278. return OP_TEXT, data.encode("utf-8")
  279. elif isinstance(data, BytesLike):
  280. return OP_BINARY, data
  281. else:
  282. raise TypeError("data must be str or bytes-like")
  283. def prepare_ctrl(data: Data) -> bytes:
  284. """
  285. Convert a string or byte-like object to bytes.
  286. This function is designed for ping and pong frames.
  287. If ``data`` is a :class:`str`, return a :class:`bytes` object encoding
  288. ``data`` in UTF-8.
  289. If ``data`` is a bytes-like object, return a :class:`bytes` object.
  290. Raises:
  291. TypeError: if ``data`` doesn't have a supported type.
  292. """
  293. if isinstance(data, str):
  294. return data.encode("utf-8")
  295. elif isinstance(data, BytesLike):
  296. return bytes(data)
  297. else:
  298. raise TypeError("data must be str or bytes-like")
  299. @dataclasses.dataclass
  300. class Close:
  301. """
  302. Code and reason for WebSocket close frames.
  303. Attributes:
  304. code: Close code.
  305. reason: Close reason.
  306. """
  307. code: int
  308. reason: str
  309. def __str__(self) -> str:
  310. """
  311. Return a human-readable representation of a close code and reason.
  312. """
  313. if 3000 <= self.code < 4000:
  314. explanation = "registered"
  315. elif 4000 <= self.code < 5000:
  316. explanation = "private use"
  317. else:
  318. explanation = CLOSE_CODES.get(self.code, "unknown")
  319. result = f"{self.code} ({explanation})"
  320. if self.reason:
  321. result = f"{result} {self.reason}"
  322. return result
  323. @classmethod
  324. def parse(cls, data: bytes) -> Close:
  325. """
  326. Parse the payload of a close frame.
  327. Args:
  328. data: payload of the close frame.
  329. Raises:
  330. ProtocolError: if data is ill-formed.
  331. UnicodeDecodeError: if the reason isn't valid UTF-8.
  332. """
  333. if len(data) >= 2:
  334. (code,) = struct.unpack("!H", data[:2])
  335. reason = data[2:].decode("utf-8")
  336. close = cls(code, reason)
  337. close.check()
  338. return close
  339. elif len(data) == 0:
  340. return cls(1005, "")
  341. else:
  342. raise exceptions.ProtocolError("close frame too short")
  343. def serialize(self) -> bytes:
  344. """
  345. Serialize the payload of a close frame.
  346. """
  347. self.check()
  348. return struct.pack("!H", self.code) + self.reason.encode("utf-8")
  349. def check(self) -> None:
  350. """
  351. Check that the close code has a valid value for a close frame.
  352. Raises:
  353. ProtocolError: if the close code is invalid.
  354. """
  355. if not (self.code in EXTERNAL_CLOSE_CODES or 3000 <= self.code < 5000):
  356. raise exceptions.ProtocolError("invalid status code")