Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hypothesis.py 9.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. # -*- coding: utf-8 -*-
  2. """
  3. Hypothesis strategies.
  4. """
  5. from __future__ import absolute_import
  6. try:
  7. import hypothesis
  8. del hypothesis
  9. except ImportError:
  10. from typing import Tuple
  11. __all__ = () # type: Tuple[str, ...]
  12. else:
  13. from csv import reader as csv_reader
  14. from os.path import dirname, join
  15. from string import ascii_letters, digits
  16. from sys import maxunicode
  17. from typing import (
  18. Callable,
  19. Iterable,
  20. List,
  21. Optional,
  22. Sequence,
  23. Text,
  24. TypeVar,
  25. cast,
  26. )
  27. from gzip import open as open_gzip
  28. from . import DecodedURL, EncodedURL
  29. from hypothesis import assume
  30. from hypothesis.strategies import (
  31. composite,
  32. integers,
  33. lists,
  34. sampled_from,
  35. text,
  36. )
  37. from idna import IDNAError, check_label, encode as idna_encode
  38. __all__ = (
  39. "decoded_urls",
  40. "encoded_urls",
  41. "hostname_labels",
  42. "hostnames",
  43. "idna_text",
  44. "paths",
  45. "port_numbers",
  46. )
  47. T = TypeVar("T")
  48. DrawCallable = Callable[[Callable[..., T]], T]
  49. try:
  50. unichr
  51. except NameError: # Py3
  52. unichr = chr # type: Callable[[int], Text]
  53. def idna_characters():
  54. # type: () -> Text
  55. """
  56. Returns a string containing IDNA characters.
  57. """
  58. global _idnaCharacters
  59. if not _idnaCharacters:
  60. result = []
  61. # Data source "IDNA Derived Properties":
  62. # https://www.iana.org/assignments/idna-tables-6.3.0/
  63. # idna-tables-6.3.0.xhtml#idna-tables-properties
  64. dataFileName = join(
  65. dirname(__file__), "idna-tables-properties.csv.gz"
  66. )
  67. with open_gzip(dataFileName) as dataFile:
  68. reader = csv_reader(
  69. (line.decode("utf-8") for line in dataFile),
  70. delimiter=",",
  71. )
  72. next(reader) # Skip header row
  73. for row in reader:
  74. codes, prop, description = row
  75. if prop != "PVALID":
  76. # CONTEXTO or CONTEXTJ are also allowed, but they come
  77. # with rules, so we're punting on those here.
  78. # See: https://tools.ietf.org/html/rfc5892
  79. continue
  80. startEnd = row[0].split("-", 1)
  81. if len(startEnd) == 1:
  82. # No end of range given; use start
  83. startEnd.append(startEnd[0])
  84. start, end = (int(i, 16) for i in startEnd)
  85. for i in range(start, end + 1):
  86. if i > maxunicode: # Happens using Py2 on Windows
  87. break
  88. result.append(unichr(i))
  89. _idnaCharacters = u"".join(result)
  90. return _idnaCharacters
  91. _idnaCharacters = "" # type: Text
  92. @composite
  93. def idna_text(draw, min_size=1, max_size=None):
  94. # type: (DrawCallable, int, Optional[int]) -> Text
  95. """
  96. A strategy which generates IDNA-encodable text.
  97. @param min_size: The minimum number of characters in the text.
  98. C{None} is treated as C{0}.
  99. @param max_size: The maximum number of characters in the text.
  100. Use C{None} for an unbounded size.
  101. """
  102. alphabet = idna_characters()
  103. assert min_size >= 1
  104. if max_size is not None:
  105. assert max_size >= 1
  106. result = cast(
  107. Text,
  108. draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
  109. )
  110. # FIXME: There should be a more efficient way to ensure we produce
  111. # valid IDNA text.
  112. try:
  113. idna_encode(result)
  114. except IDNAError:
  115. assume(False)
  116. return result
  117. @composite
  118. def port_numbers(draw, allow_zero=False):
  119. # type: (DrawCallable, bool) -> int
  120. """
  121. A strategy which generates port numbers.
  122. @param allow_zero: Whether to allow port C{0} as a possible value.
  123. """
  124. if allow_zero:
  125. min_value = 0
  126. else:
  127. min_value = 1
  128. return cast(int, draw(integers(min_value=min_value, max_value=65535)))
  129. @composite
  130. def hostname_labels(draw, allow_idn=True):
  131. # type: (DrawCallable, bool) -> Text
  132. """
  133. A strategy which generates host name labels.
  134. @param allow_idn: Whether to allow non-ASCII characters as allowed by
  135. internationalized domain names (IDNs).
  136. """
  137. if allow_idn:
  138. label = cast(Text, draw(idna_text(min_size=1, max_size=63)))
  139. try:
  140. label.encode("ascii")
  141. except UnicodeEncodeError:
  142. # If the label doesn't encode to ASCII, then we need to check
  143. # the length of the label after encoding to punycode and adding
  144. # the xn-- prefix.
  145. while len(label.encode("punycode")) > 63 - len("xn--"):
  146. # Rather than bombing out, just trim from the end until it
  147. # is short enough, so hypothesis doesn't have to generate
  148. # new data.
  149. label = label[:-1]
  150. else:
  151. label = cast(
  152. Text,
  153. draw(
  154. text(
  155. min_size=1,
  156. max_size=63,
  157. alphabet=Text(ascii_letters + digits + u"-"),
  158. )
  159. ),
  160. )
  161. # Filter invalid labels.
  162. # It would be better to reliably avoid generation of bogus labels in
  163. # the first place, but it's hard...
  164. try:
  165. check_label(label)
  166. except UnicodeError: # pragma: no cover (not always drawn)
  167. assume(False)
  168. return label
  169. @composite
  170. def hostnames(draw, allow_leading_digit=True, allow_idn=True):
  171. # type: (DrawCallable, bool, bool) -> Text
  172. """
  173. A strategy which generates host names.
  174. @param allow_leading_digit: Whether to allow a leading digit in host
  175. names; they were not allowed prior to RFC 1123.
  176. @param allow_idn: Whether to allow non-ASCII characters as allowed by
  177. internationalized domain names (IDNs).
  178. """
  179. # Draw first label, filtering out labels with leading digits if needed
  180. labels = [
  181. cast(
  182. Text,
  183. draw(
  184. hostname_labels(allow_idn=allow_idn).filter(
  185. lambda l: (
  186. True if allow_leading_digit else l[0] not in digits
  187. )
  188. )
  189. ),
  190. )
  191. ]
  192. # Draw remaining labels
  193. labels += cast(
  194. List[Text],
  195. draw(
  196. lists(
  197. hostname_labels(allow_idn=allow_idn),
  198. min_size=1,
  199. max_size=4,
  200. )
  201. ),
  202. )
  203. # Trim off labels until the total host name length fits in 252
  204. # characters. This avoids having to filter the data.
  205. while sum(len(label) for label in labels) + len(labels) - 1 > 252:
  206. labels = labels[:-1]
  207. return u".".join(labels)
  208. def path_characters():
  209. # type: () -> str
  210. """
  211. Returns a string containing valid URL path characters.
  212. """
  213. global _path_characters
  214. if _path_characters is None:
  215. def chars():
  216. # type: () -> Iterable[Text]
  217. for i in range(maxunicode):
  218. c = unichr(i)
  219. # Exclude reserved characters
  220. if c in "#/?":
  221. continue
  222. # Exclude anything not UTF-8 compatible
  223. try:
  224. c.encode("utf-8")
  225. except UnicodeEncodeError:
  226. continue
  227. yield c
  228. _path_characters = "".join(chars())
  229. return _path_characters
  230. _path_characters = None # type: Optional[str]
  231. @composite
  232. def paths(draw):
  233. # type: (DrawCallable) -> Sequence[Text]
  234. return cast(
  235. List[Text],
  236. draw(
  237. lists(text(min_size=1, alphabet=path_characters()), max_size=10)
  238. ),
  239. )
  240. @composite
  241. def encoded_urls(draw):
  242. # type: (DrawCallable) -> EncodedURL
  243. """
  244. A strategy which generates L{EncodedURL}s.
  245. Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
  246. protocol-friendly URI.
  247. """
  248. port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
  249. host = cast(Text, draw(hostnames()))
  250. path = cast(Sequence[Text], draw(paths()))
  251. if port == 0:
  252. port = None
  253. return EncodedURL(
  254. scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
  255. host=host,
  256. port=port,
  257. path=path,
  258. )
  259. @composite
  260. def decoded_urls(draw):
  261. # type: (DrawCallable) -> DecodedURL
  262. """
  263. A strategy which generates L{DecodedURL}s.
  264. Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
  265. protocol-friendly URI.
  266. """
  267. return DecodedURL(draw(encoded_urls()))