Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_url.py 53KB

1 year ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. from __future__ import unicode_literals
  5. import sys
  6. import socket
  7. from typing import Any, Iterable, Optional, Text, Tuple, cast
  8. from .common import HyperlinkTestCase
  9. from .. import URL, URLParseError
  10. from .._url import inet_pton, SCHEME_PORT_MAP
  11. PY2 = sys.version_info[0] == 2
  12. unicode = type("")
  13. BASIC_URL = "http://www.foo.com/a/nice/path/?zot=23&zut"
  14. # Examples from RFC 3986 section 5.4, Reference Resolution Examples
  15. relativeLinkBaseForRFC3986 = "http://a/b/c/d;p?q"
  16. relativeLinkTestsForRFC3986 = [
  17. # "Normal"
  18. # ('g:h', 'g:h'), # can't click on a scheme-having url without an abs path
  19. ("g", "http://a/b/c/g"),
  20. ("./g", "http://a/b/c/g"),
  21. ("g/", "http://a/b/c/g/"),
  22. ("/g", "http://a/g"),
  23. ("//g", "http://g"),
  24. ("?y", "http://a/b/c/d;p?y"),
  25. ("g?y", "http://a/b/c/g?y"),
  26. ("#s", "http://a/b/c/d;p?q#s"),
  27. ("g#s", "http://a/b/c/g#s"),
  28. ("g?y#s", "http://a/b/c/g?y#s"),
  29. (";x", "http://a/b/c/;x"),
  30. ("g;x", "http://a/b/c/g;x"),
  31. ("g;x?y#s", "http://a/b/c/g;x?y#s"),
  32. ("", "http://a/b/c/d;p?q"),
  33. (".", "http://a/b/c/"),
  34. ("./", "http://a/b/c/"),
  35. ("..", "http://a/b/"),
  36. ("../", "http://a/b/"),
  37. ("../g", "http://a/b/g"),
  38. ("../..", "http://a/"),
  39. ("../../", "http://a/"),
  40. ("../../g", "http://a/g"),
  41. # Abnormal examples
  42. # ".." cannot be used to change the authority component of a URI.
  43. ("../../../g", "http://a/g"),
  44. ("../../../../g", "http://a/g"),
  45. # Only include "." and ".." when they are only part of a larger segment,
  46. # not by themselves.
  47. ("/./g", "http://a/g"),
  48. ("/../g", "http://a/g"),
  49. ("g.", "http://a/b/c/g."),
  50. (".g", "http://a/b/c/.g"),
  51. ("g..", "http://a/b/c/g.."),
  52. ("..g", "http://a/b/c/..g"),
  53. # Unnecessary or nonsensical forms of "." and "..".
  54. ("./../g", "http://a/b/g"),
  55. ("./g/.", "http://a/b/c/g/"),
  56. ("g/./h", "http://a/b/c/g/h"),
  57. ("g/../h", "http://a/b/c/h"),
  58. ("g;x=1/./y", "http://a/b/c/g;x=1/y"),
  59. ("g;x=1/../y", "http://a/b/c/y"),
  60. # Separating the reference's query and fragment components from the path.
  61. ("g?y/./x", "http://a/b/c/g?y/./x"),
  62. ("g?y/../x", "http://a/b/c/g?y/../x"),
  63. ("g#s/./x", "http://a/b/c/g#s/./x"),
  64. ("g#s/../x", "http://a/b/c/g#s/../x"),
  65. ]
  66. ROUNDTRIP_TESTS = (
  67. "http://localhost",
  68. "http://localhost/",
  69. "http://127.0.0.1/",
  70. "http://[::127.0.0.1]/",
  71. "http://[::1]/",
  72. "http://localhost/foo",
  73. "http://localhost/foo/",
  74. "http://localhost/foo!!bar/",
  75. "http://localhost/foo%20bar/",
  76. "http://localhost/foo%2Fbar/",
  77. "http://localhost/foo?n",
  78. "http://localhost/foo?n=v",
  79. "http://localhost/foo?n=/a/b",
  80. "http://example.com/foo!@$bar?b!@z=123",
  81. "http://localhost/asd?a=asd%20sdf/345",
  82. "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)",
  83. "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)",
  84. "?sslrootcert=/Users/glyph/Downloads/rds-ca-2015-root.pem&sslmode=verify",
  85. # from boltons.urlutils' tests
  86. "http://googlewebsite.com/e-shops.aspx",
  87. "http://example.com:8080/search?q=123&business=Nothing%20Special",
  88. "http://hatnote.com:9000/?arg=1&arg=2&arg=3",
  89. "https://xn--bcher-kva.ch",
  90. "http://xn--ggbla1c4e.xn--ngbc5azd/",
  91. "http://tools.ietf.org/html/rfc3986#section-3.4",
  92. # 'http://wiki:pedia@hatnote.com',
  93. "ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz",
  94. "http://[1080:0:0:0:8:800:200C:417A]/index.html",
  95. "ssh://192.0.2.16:2222/",
  96. "https://[::101.45.75.219]:80/?hi=bye",
  97. "ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)",
  98. "mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org",
  99. "news:alt.rec.motorcycle",
  100. "tel:+1-800-867-5309",
  101. "urn:oasis:member:A00024:x",
  102. (
  103. "magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%"
  104. "20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&"
  105. "tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&"
  106. "tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337"
  107. ),
  108. # percent-encoded delimiters in percent-encodable fields
  109. "https://%3A@example.com/", # colon in username
  110. "https://%40@example.com/", # at sign in username
  111. "https://%2f@example.com/", # slash in username
  112. "https://a:%3a@example.com/", # colon in password
  113. "https://a:%40@example.com/", # at sign in password
  114. "https://a:%2f@example.com/", # slash in password
  115. "https://a:%3f@example.com/", # question mark in password
  116. "https://example.com/%2F/", # slash in path
  117. "https://example.com/%3F/", # question mark in path
  118. "https://example.com/%23/", # hash in path
  119. "https://example.com/?%23=b", # hash in query param name
  120. "https://example.com/?%3D=b", # equals in query param name
  121. "https://example.com/?%26=b", # ampersand in query param name
  122. "https://example.com/?a=%23", # hash in query param value
  123. "https://example.com/?a=%26", # ampersand in query param value
  124. "https://example.com/?a=%3D", # equals in query param value
  125. "https://example.com/?foo+bar=baz", # plus in query param name
  126. "https://example.com/?foo=bar+baz", # plus in query param value
  127. # double-encoded percent sign in all percent-encodable positions:
  128. "http://(%2525):(%2525)@example.com/(%2525)/?(%2525)=(%2525)#(%2525)",
  129. # colon in first part of schemeless relative url
  130. "first_seg_rel_path__colon%3Anotok/second_seg__colon%3Aok",
  131. )
  132. class TestURL(HyperlinkTestCase):
  133. """
  134. Tests for L{URL}.
  135. """
  136. def assertUnicoded(self, u):
  137. # type: (URL) -> None
  138. """
  139. The given L{URL}'s components should be L{unicode}.
  140. @param u: The L{URL} to test.
  141. """
  142. self.assertTrue(
  143. isinstance(u.scheme, unicode) or u.scheme is None, repr(u)
  144. )
  145. self.assertTrue(isinstance(u.host, unicode) or u.host is None, repr(u))
  146. for seg in u.path:
  147. self.assertEqual(type(seg), unicode, repr(u))
  148. for (_k, v) in u.query:
  149. self.assertEqual(type(seg), unicode, repr(u))
  150. self.assertTrue(v is None or isinstance(v, unicode), repr(u))
  151. self.assertEqual(type(u.fragment), unicode, repr(u))
  152. def assertURL(
  153. self,
  154. u, # type: URL
  155. scheme, # type: Text
  156. host, # type: Text
  157. path, # type: Iterable[Text]
  158. query, # type: Iterable[Tuple[Text, Optional[Text]]]
  159. fragment, # type: Text
  160. port, # type: Optional[int]
  161. userinfo="", # type: Text
  162. ):
  163. # type: (...) -> None
  164. """
  165. The given L{URL} should have the given components.
  166. @param u: The actual L{URL} to examine.
  167. @param scheme: The expected scheme.
  168. @param host: The expected host.
  169. @param path: The expected path.
  170. @param query: The expected query.
  171. @param fragment: The expected fragment.
  172. @param port: The expected port.
  173. @param userinfo: The expected userinfo.
  174. """
  175. actual = (
  176. u.scheme,
  177. u.host,
  178. u.path,
  179. u.query,
  180. u.fragment,
  181. u.port,
  182. u.userinfo,
  183. )
  184. expected = (
  185. scheme,
  186. host,
  187. tuple(path),
  188. tuple(query),
  189. fragment,
  190. port,
  191. u.userinfo,
  192. )
  193. self.assertEqual(actual, expected)
  194. def test_initDefaults(self):
  195. # type: () -> None
  196. """
  197. L{URL} should have appropriate default values.
  198. """
  199. def check(u):
  200. # type: (URL) -> None
  201. self.assertUnicoded(u)
  202. self.assertURL(u, "http", "", [], [], "", 80, "")
  203. check(URL("http", ""))
  204. check(URL("http", "", [], []))
  205. check(URL("http", "", [], [], ""))
  206. def test_init(self):
  207. # type: () -> None
  208. """
  209. L{URL} should accept L{unicode} parameters.
  210. """
  211. u = URL("s", "h", ["p"], [("k", "v"), ("k", None)], "f")
  212. self.assertUnicoded(u)
  213. self.assertURL(u, "s", "h", ["p"], [("k", "v"), ("k", None)], "f", None)
  214. self.assertURL(
  215. URL("http", "\xe0", ["\xe9"], [("\u03bb", "\u03c0")], "\u22a5"),
  216. "http",
  217. "\xe0",
  218. ["\xe9"],
  219. [("\u03bb", "\u03c0")],
  220. "\u22a5",
  221. 80,
  222. )
  223. def test_initPercent(self):
  224. # type: () -> None
  225. """
  226. L{URL} should accept (and not interpret) percent characters.
  227. """
  228. u = URL("s", "%68", ["%70"], [("%6B", "%76"), ("%6B", None)], "%66")
  229. self.assertUnicoded(u)
  230. self.assertURL(
  231. u, "s", "%68", ["%70"], [("%6B", "%76"), ("%6B", None)], "%66", None
  232. )
  233. def test_repr(self):
  234. # type: () -> None
  235. """
  236. L{URL.__repr__} will display the canonical form of the URL, wrapped in
  237. a L{URL.from_text} invocation, so that it is C{eval}-able but still
  238. easy to read.
  239. """
  240. self.assertEqual(
  241. repr(
  242. URL(
  243. scheme="http",
  244. host="foo",
  245. path=["bar"],
  246. query=[("baz", None), ("k", "v")],
  247. fragment="frob",
  248. )
  249. ),
  250. "URL.from_text(%s)" % (repr("http://foo/bar?baz&k=v#frob"),),
  251. )
  252. def test_from_text(self):
  253. # type: () -> None
  254. """
  255. Round-tripping L{URL.from_text} with C{str} results in an equivalent
  256. URL.
  257. """
  258. urlpath = URL.from_text(BASIC_URL)
  259. self.assertEqual(BASIC_URL, urlpath.to_text())
  260. def test_roundtrip(self):
  261. # type: () -> None
  262. """
  263. L{URL.to_text} should invert L{URL.from_text}.
  264. """
  265. for test in ROUNDTRIP_TESTS:
  266. result = URL.from_text(test).to_text(with_password=True)
  267. self.assertEqual(test, result)
  268. def test_roundtrip_double_iri(self):
  269. # type: () -> None
  270. for test in ROUNDTRIP_TESTS:
  271. url = URL.from_text(test)
  272. iri = url.to_iri()
  273. double_iri = iri.to_iri()
  274. assert iri == double_iri
  275. iri_text = iri.to_text(with_password=True)
  276. double_iri_text = double_iri.to_text(with_password=True)
  277. assert iri_text == double_iri_text
  278. return
  279. def test_equality(self):
  280. # type: () -> None
  281. """
  282. Two URLs decoded using L{URL.from_text} will be equal (C{==}) if they
  283. decoded same URL string, and unequal (C{!=}) if they decoded different
  284. strings.
  285. """
  286. urlpath = URL.from_text(BASIC_URL)
  287. self.assertEqual(urlpath, URL.from_text(BASIC_URL))
  288. self.assertNotEqual(
  289. urlpath,
  290. URL.from_text(
  291. "ftp://www.anotherinvaliddomain.com/" "foo/bar/baz/?zot=21&zut"
  292. ),
  293. )
  294. def test_fragmentEquality(self):
  295. # type: () -> None
  296. """
  297. An URL created with the empty string for a fragment compares equal
  298. to an URL created with an unspecified fragment.
  299. """
  300. self.assertEqual(URL(fragment=""), URL())
  301. self.assertEqual(
  302. URL.from_text("http://localhost/#"),
  303. URL.from_text("http://localhost/"),
  304. )
  305. def test_child(self):
  306. # type: () -> None
  307. """
  308. L{URL.child} appends a new path segment, but does not affect the query
  309. or fragment.
  310. """
  311. urlpath = URL.from_text(BASIC_URL)
  312. self.assertEqual(
  313. "http://www.foo.com/a/nice/path/gong?zot=23&zut",
  314. urlpath.child("gong").to_text(),
  315. )
  316. self.assertEqual(
  317. "http://www.foo.com/a/nice/path/gong%2F?zot=23&zut",
  318. urlpath.child("gong/").to_text(),
  319. )
  320. self.assertEqual(
  321. "http://www.foo.com/a/nice/path/gong%2Fdouble?zot=23&zut",
  322. urlpath.child("gong/double").to_text(),
  323. )
  324. self.assertEqual(
  325. "http://www.foo.com/a/nice/path/gong%2Fdouble%2F?zot=23&zut",
  326. urlpath.child("gong/double/").to_text(),
  327. )
  328. def test_multiChild(self):
  329. # type: () -> None
  330. """
  331. L{URL.child} receives multiple segments as C{*args} and appends each in
  332. turn.
  333. """
  334. url = URL.from_text("http://example.com/a/b")
  335. self.assertEqual(
  336. url.child("c", "d", "e").to_text(), "http://example.com/a/b/c/d/e"
  337. )
  338. def test_childInitRoot(self):
  339. # type: () -> None
  340. """
  341. L{URL.child} of a L{URL} without a path produces a L{URL} with a single
  342. path segment.
  343. """
  344. childURL = URL(host="www.foo.com").child("c")
  345. self.assertTrue(childURL.rooted)
  346. self.assertEqual("http://www.foo.com/c", childURL.to_text())
  347. def test_emptyChild(self):
  348. # type: () -> None
  349. """
  350. L{URL.child} without any new segments returns the original L{URL}.
  351. """
  352. url = URL(host="www.foo.com")
  353. self.assertEqual(url.child(), url)
  354. def test_sibling(self):
  355. # type: () -> None
  356. """
  357. L{URL.sibling} of a L{URL} replaces the last path segment, but does not
  358. affect the query or fragment.
  359. """
  360. urlpath = URL.from_text(BASIC_URL)
  361. self.assertEqual(
  362. "http://www.foo.com/a/nice/path/sister?zot=23&zut",
  363. urlpath.sibling("sister").to_text(),
  364. )
  365. # Use an url without trailing '/' to check child removal.
  366. url_text = "http://www.foo.com/a/nice/path?zot=23&zut"
  367. urlpath = URL.from_text(url_text)
  368. self.assertEqual(
  369. "http://www.foo.com/a/nice/sister?zot=23&zut",
  370. urlpath.sibling("sister").to_text(),
  371. )
  372. def test_click(self):
  373. # type: () -> None
  374. """
  375. L{URL.click} interprets the given string as a relative URI-reference
  376. and returns a new L{URL} interpreting C{self} as the base absolute URI.
  377. """
  378. urlpath = URL.from_text(BASIC_URL)
  379. # A null uri should be valid (return here).
  380. self.assertEqual(
  381. "http://www.foo.com/a/nice/path/?zot=23&zut",
  382. urlpath.click("").to_text(),
  383. )
  384. # A simple relative path remove the query.
  385. self.assertEqual(
  386. "http://www.foo.com/a/nice/path/click",
  387. urlpath.click("click").to_text(),
  388. )
  389. # An absolute path replace path and query.
  390. self.assertEqual(
  391. "http://www.foo.com/click", urlpath.click("/click").to_text()
  392. )
  393. # Replace just the query.
  394. self.assertEqual(
  395. "http://www.foo.com/a/nice/path/?burp",
  396. urlpath.click("?burp").to_text(),
  397. )
  398. # One full url to another should not generate '//' between authority.
  399. # and path
  400. self.assertTrue(
  401. "//foobar"
  402. not in urlpath.click("http://www.foo.com/foobar").to_text()
  403. )
  404. # From a url with no query clicking a url with a query, the query
  405. # should be handled properly.
  406. u = URL.from_text("http://www.foo.com/me/noquery")
  407. self.assertEqual(
  408. "http://www.foo.com/me/17?spam=158",
  409. u.click("/me/17?spam=158").to_text(),
  410. )
  411. # Check that everything from the path onward is removed when the click
  412. # link has no path.
  413. u = URL.from_text("http://localhost/foo?abc=def")
  414. self.assertEqual(
  415. u.click("http://www.python.org").to_text(), "http://www.python.org"
  416. )
  417. # https://twistedmatrix.com/trac/ticket/8184
  418. u = URL.from_text("http://hatnote.com/a/b/../c/./d/e/..")
  419. res = "http://hatnote.com/a/c/d/"
  420. self.assertEqual(u.click("").to_text(), res)
  421. # test click default arg is same as empty string above
  422. self.assertEqual(u.click().to_text(), res)
  423. # test click on a URL instance
  424. u = URL.fromText("http://localhost/foo/?abc=def")
  425. u2 = URL.from_text("bar")
  426. u3 = u.click(u2)
  427. self.assertEqual(u3.to_text(), "http://localhost/foo/bar")
  428. def test_clickRFC3986(self):
  429. # type: () -> None
  430. """
  431. L{URL.click} should correctly resolve the examples in RFC 3986.
  432. """
  433. base = URL.from_text(relativeLinkBaseForRFC3986)
  434. for (ref, expected) in relativeLinkTestsForRFC3986:
  435. self.assertEqual(base.click(ref).to_text(), expected)
  436. def test_clickSchemeRelPath(self):
  437. # type: () -> None
  438. """
  439. L{URL.click} should not accept schemes with relative paths.
  440. """
  441. base = URL.from_text(relativeLinkBaseForRFC3986)
  442. self.assertRaises(NotImplementedError, base.click, "g:h")
  443. self.assertRaises(NotImplementedError, base.click, "http:h")
  444. def test_cloneUnchanged(self):
  445. # type: () -> None
  446. """
  447. Verify that L{URL.replace} doesn't change any of the arguments it
  448. is passed.
  449. """
  450. urlpath = URL.from_text("https://x:1/y?z=1#A")
  451. self.assertEqual(
  452. urlpath.replace(
  453. urlpath.scheme,
  454. urlpath.host,
  455. urlpath.path,
  456. urlpath.query,
  457. urlpath.fragment,
  458. urlpath.port,
  459. ),
  460. urlpath,
  461. )
  462. self.assertEqual(urlpath.replace(), urlpath)
  463. def test_clickCollapse(self):
  464. # type: () -> None
  465. """
  466. L{URL.click} collapses C{.} and C{..} according to RFC 3986 section
  467. 5.2.4.
  468. """
  469. tests = [
  470. ["http://localhost/", ".", "http://localhost/"],
  471. ["http://localhost/", "..", "http://localhost/"],
  472. ["http://localhost/a/b/c", ".", "http://localhost/a/b/"],
  473. ["http://localhost/a/b/c", "..", "http://localhost/a/"],
  474. ["http://localhost/a/b/c", "./d/e", "http://localhost/a/b/d/e"],
  475. ["http://localhost/a/b/c", "../d/e", "http://localhost/a/d/e"],
  476. ["http://localhost/a/b/c", "/./d/e", "http://localhost/d/e"],
  477. ["http://localhost/a/b/c", "/../d/e", "http://localhost/d/e"],
  478. [
  479. "http://localhost/a/b/c/",
  480. "../../d/e/",
  481. "http://localhost/a/d/e/",
  482. ],
  483. ["http://localhost/a/./c", "../d/e", "http://localhost/d/e"],
  484. ["http://localhost/a/./c/", "../d/e", "http://localhost/a/d/e"],
  485. [
  486. "http://localhost/a/b/c/d",
  487. "./e/../f/../g",
  488. "http://localhost/a/b/c/g",
  489. ],
  490. ["http://localhost/a/b/c", "d//e", "http://localhost/a/b/d//e"],
  491. ]
  492. for start, click, expected in tests:
  493. actual = URL.from_text(start).click(click).to_text()
  494. self.assertEqual(
  495. actual,
  496. expected,
  497. "{start}.click({click}) => {actual} not {expected}".format(
  498. start=start,
  499. click=repr(click),
  500. actual=actual,
  501. expected=expected,
  502. ),
  503. )
  504. def test_queryAdd(self):
  505. # type: () -> None
  506. """
  507. L{URL.add} adds query parameters.
  508. """
  509. self.assertEqual(
  510. "http://www.foo.com/a/nice/path/?foo=bar",
  511. URL.from_text("http://www.foo.com/a/nice/path/")
  512. .add("foo", "bar")
  513. .to_text(),
  514. )
  515. self.assertEqual(
  516. "http://www.foo.com/?foo=bar",
  517. URL(host="www.foo.com").add("foo", "bar").to_text(),
  518. )
  519. urlpath = URL.from_text(BASIC_URL)
  520. self.assertEqual(
  521. "http://www.foo.com/a/nice/path/?zot=23&zut&burp",
  522. urlpath.add("burp").to_text(),
  523. )
  524. self.assertEqual(
  525. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx",
  526. urlpath.add("burp", "xxx").to_text(),
  527. )
  528. self.assertEqual(
  529. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zing",
  530. urlpath.add("burp", "xxx").add("zing").to_text(),
  531. )
  532. # Note the inversion!
  533. self.assertEqual(
  534. "http://www.foo.com/a/nice/path/?zot=23&zut&zing&burp=xxx",
  535. urlpath.add("zing").add("burp", "xxx").to_text(),
  536. )
  537. # Note the two values for the same name.
  538. self.assertEqual(
  539. "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zot=32",
  540. urlpath.add("burp", "xxx").add("zot", "32").to_text(),
  541. )
  542. def test_querySet(self):
  543. # type: () -> None
  544. """
  545. L{URL.set} replaces query parameters by name.
  546. """
  547. urlpath = URL.from_text(BASIC_URL)
  548. self.assertEqual(
  549. "http://www.foo.com/a/nice/path/?zot=32&zut",
  550. urlpath.set("zot", "32").to_text(),
  551. )
  552. # Replace name without value with name/value and vice-versa.
  553. self.assertEqual(
  554. "http://www.foo.com/a/nice/path/?zot&zut=itworked",
  555. urlpath.set("zot").set("zut", "itworked").to_text(),
  556. )
  557. # Q: what happens when the query has two values and we replace?
  558. # A: we replace both values with a single one
  559. self.assertEqual(
  560. "http://www.foo.com/a/nice/path/?zot=32&zut",
  561. urlpath.add("zot", "xxx").set("zot", "32").to_text(),
  562. )
  563. def test_queryRemove(self):
  564. # type: () -> None
  565. """
  566. L{URL.remove} removes instances of a query parameter.
  567. """
  568. url = URL.from_text("https://example.com/a/b/?foo=1&bar=2&foo=3")
  569. self.assertEqual(
  570. url.remove("foo"), URL.from_text("https://example.com/a/b/?bar=2")
  571. )
  572. self.assertEqual(
  573. url.remove(name="foo", value="1"),
  574. URL.from_text("https://example.com/a/b/?bar=2&foo=3"),
  575. )
  576. self.assertEqual(
  577. url.remove(name="foo", limit=1),
  578. URL.from_text("https://example.com/a/b/?bar=2&foo=3"),
  579. )
  580. self.assertEqual(
  581. url.remove(name="foo", value="1", limit=0),
  582. URL.from_text("https://example.com/a/b/?foo=1&bar=2&foo=3"),
  583. )
  584. def test_parseEqualSignInParamValue(self):
  585. # type: () -> None
  586. """
  587. Every C{=}-sign after the first in a query parameter is simply included
  588. in the value of the parameter.
  589. """
  590. u = URL.from_text("http://localhost/?=x=x=x")
  591. self.assertEqual(u.get(""), ["x=x=x"])
  592. self.assertEqual(u.to_text(), "http://localhost/?=x=x=x")
  593. u = URL.from_text("http://localhost/?foo=x=x=x&bar=y")
  594. self.assertEqual(u.query, (("foo", "x=x=x"), ("bar", "y")))
  595. self.assertEqual(u.to_text(), "http://localhost/?foo=x=x=x&bar=y")
  596. u = URL.from_text(
  597. "https://example.com/?argument=3&argument=4&operator=%3D"
  598. )
  599. iri = u.to_iri()
  600. self.assertEqual(iri.get("operator"), ["="])
  601. # assert that the equals is not unnecessarily escaped
  602. self.assertEqual(iri.to_uri().get("operator"), ["="])
  603. def test_empty(self):
  604. # type: () -> None
  605. """
  606. An empty L{URL} should serialize as the empty string.
  607. """
  608. self.assertEqual(URL().to_text(), "")
  609. def test_justQueryText(self):
  610. # type: () -> None
  611. """
  612. An L{URL} with query text should serialize as just query text.
  613. """
  614. u = URL(query=[("hello", "world")])
  615. self.assertEqual(u.to_text(), "?hello=world")
  616. def test_identicalEqual(self):
  617. # type: () -> None
  618. """
  619. L{URL} compares equal to itself.
  620. """
  621. u = URL.from_text("http://localhost/")
  622. self.assertEqual(u, u)
  623. def test_similarEqual(self):
  624. # type: () -> None
  625. """
  626. URLs with equivalent components should compare equal.
  627. """
  628. u1 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
  629. u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
  630. self.assertEqual(u1, u2)
  631. def test_differentNotEqual(self):
  632. # type: () -> None
  633. """
  634. L{URL}s that refer to different resources are both unequal (C{!=}) and
  635. also not equal (not C{==}).
  636. """
  637. u1 = URL.from_text("http://localhost/a")
  638. u2 = URL.from_text("http://localhost/b")
  639. self.assertFalse(u1 == u2, "%r != %r" % (u1, u2))
  640. self.assertNotEqual(u1, u2)
  641. def test_otherTypesNotEqual(self):
  642. # type: () -> None
  643. """
  644. L{URL} is not equal (C{==}) to other types.
  645. """
  646. u = URL.from_text("http://localhost/")
  647. self.assertFalse(u == 42, "URL must not equal a number.")
  648. self.assertFalse(u == object(), "URL must not equal an object.")
  649. self.assertNotEqual(u, 42)
  650. self.assertNotEqual(u, object())
  651. def test_identicalNotUnequal(self):
  652. # type: () -> None
  653. """
  654. Identical L{URL}s are not unequal (C{!=}) to each other.
  655. """
  656. u = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
  657. self.assertFalse(u != u, "%r == itself" % u)
  658. def test_similarNotUnequal(self):
  659. # type: () -> None
  660. """
  661. Structurally similar L{URL}s are not unequal (C{!=}) to each other.
  662. """
  663. u1 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
  664. u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
  665. self.assertFalse(u1 != u2, "%r == %r" % (u1, u2))
  666. def test_differentUnequal(self):
  667. # type: () -> None
  668. """
  669. Structurally different L{URL}s are unequal (C{!=}) to each other.
  670. """
  671. u1 = URL.from_text("http://localhost/a")
  672. u2 = URL.from_text("http://localhost/b")
  673. self.assertTrue(u1 != u2, "%r == %r" % (u1, u2))
  674. def test_otherTypesUnequal(self):
  675. # type: () -> None
  676. """
  677. L{URL} is unequal (C{!=}) to other types.
  678. """
  679. u = URL.from_text("http://localhost/")
  680. self.assertTrue(u != 42, "URL must differ from a number.")
  681. self.assertTrue(u != object(), "URL must be differ from an object.")
  682. def test_asURI(self):
  683. # type: () -> None
  684. """
  685. L{URL.asURI} produces an URI which converts any URI unicode encoding
  686. into pure US-ASCII and returns a new L{URL}.
  687. """
  688. unicodey = (
  689. "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
  690. "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}"
  691. "?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}="
  692. "\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}"
  693. "#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}"
  694. )
  695. iri = URL.from_text(unicodey)
  696. uri = iri.asURI()
  697. self.assertEqual(iri.host, "\N{LATIN SMALL LETTER E WITH ACUTE}.com")
  698. self.assertEqual(
  699. iri.path[0], "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}"
  700. )
  701. self.assertEqual(iri.to_text(), unicodey)
  702. expectedURI = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA"
  703. actualURI = uri.to_text()
  704. self.assertEqual(
  705. actualURI, expectedURI, "%r != %r" % (actualURI, expectedURI)
  706. )
  707. def test_asIRI(self):
  708. # type: () -> None
  709. """
  710. L{URL.asIRI} decodes any percent-encoded text in the URI, making it
  711. more suitable for reading by humans, and returns a new L{URL}.
  712. """
  713. asciiish = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA"
  714. uri = URL.from_text(asciiish)
  715. iri = uri.asIRI()
  716. self.assertEqual(uri.host, "xn--9ca.com")
  717. self.assertEqual(uri.path[0], "%C3%A9")
  718. self.assertEqual(uri.to_text(), asciiish)
  719. expectedIRI = (
  720. "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
  721. "\N{LATIN SMALL LETTER E WITH ACUTE}"
  722. "?\N{LATIN SMALL LETTER A WITH ACUTE}="
  723. "\N{LATIN SMALL LETTER I WITH ACUTE}"
  724. "#\N{LATIN SMALL LETTER U WITH ACUTE}"
  725. )
  726. actualIRI = iri.to_text()
  727. self.assertEqual(
  728. actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI)
  729. )
  730. def test_badUTF8AsIRI(self):
  731. # type: () -> None
  732. """
  733. Bad UTF-8 in a path segment, query parameter, or fragment results in
  734. that portion of the URI remaining percent-encoded in the IRI.
  735. """
  736. urlWithBinary = "http://xn--9ca.com/%00%FF/%C3%A9"
  737. uri = URL.from_text(urlWithBinary)
  738. iri = uri.asIRI()
  739. expectedIRI = (
  740. "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
  741. "%00%FF/"
  742. "\N{LATIN SMALL LETTER E WITH ACUTE}"
  743. )
  744. actualIRI = iri.to_text()
  745. self.assertEqual(
  746. actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI)
  747. )
  748. def test_alreadyIRIAsIRI(self):
  749. # type: () -> None
  750. """
  751. A L{URL} composed of non-ASCII text will result in non-ASCII text.
  752. """
  753. unicodey = (
  754. "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
  755. "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}"
  756. "?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}="
  757. "\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}"
  758. "#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}"
  759. )
  760. iri = URL.from_text(unicodey)
  761. alsoIRI = iri.asIRI()
  762. self.assertEqual(alsoIRI.to_text(), unicodey)
  763. def test_alreadyURIAsURI(self):
  764. # type: () -> None
  765. """
  766. A L{URL} composed of encoded text will remain encoded.
  767. """
  768. expectedURI = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA"
  769. uri = URL.from_text(expectedURI)
  770. actualURI = uri.asURI().to_text()
  771. self.assertEqual(actualURI, expectedURI)
  772. def test_userinfo(self):
  773. # type: () -> None
  774. """
  775. L{URL.from_text} will parse the C{userinfo} portion of the URI
  776. separately from the host and port.
  777. """
  778. url = URL.from_text(
  779. "http://someuser:somepassword@example.com/some-segment@ignore"
  780. )
  781. self.assertEqual(
  782. url.authority(True), "someuser:somepassword@example.com"
  783. )
  784. self.assertEqual(url.authority(False), "someuser:@example.com")
  785. self.assertEqual(url.userinfo, "someuser:somepassword")
  786. self.assertEqual(url.user, "someuser")
  787. self.assertEqual(
  788. url.to_text(), "http://someuser:@example.com/some-segment@ignore"
  789. )
  790. self.assertEqual(
  791. url.replace(userinfo="someuser").to_text(),
  792. "http://someuser@example.com/some-segment@ignore",
  793. )
  794. def test_portText(self):
  795. # type: () -> None
  796. """
  797. L{URL.from_text} parses custom port numbers as integers.
  798. """
  799. portURL = URL.from_text("http://www.example.com:8080/")
  800. self.assertEqual(portURL.port, 8080)
  801. self.assertEqual(portURL.to_text(), "http://www.example.com:8080/")
  802. def test_mailto(self):
  803. # type: () -> None
  804. """
  805. Although L{URL} instances are mainly for dealing with HTTP, other
  806. schemes (such as C{mailto:}) should work as well. For example,
  807. L{URL.from_text}/L{URL.to_text} round-trips cleanly for a C{mailto:}
  808. URL representing an email address.
  809. """
  810. self.assertEqual(
  811. URL.from_text("mailto:user@example.com").to_text(),
  812. "mailto:user@example.com",
  813. )
  814. def test_httpWithoutHost(self):
  815. # type: () -> None
  816. """
  817. An HTTP URL without a hostname, but with a path, should also round-trip
  818. cleanly.
  819. """
  820. without_host = URL.from_text("http:relative-path")
  821. self.assertEqual(without_host.host, "")
  822. self.assertEqual(without_host.path, ("relative-path",))
  823. self.assertEqual(without_host.uses_netloc, False)
  824. self.assertEqual(without_host.to_text(), "http:relative-path")
  825. def test_queryIterable(self):
  826. # type: () -> None
  827. """
  828. When a L{URL} is created with a C{query} argument, the C{query}
  829. argument is converted into an N-tuple of 2-tuples, sensibly
  830. handling dictionaries.
  831. """
  832. expected = (("alpha", "beta"),)
  833. url = URL(query=[("alpha", "beta")])
  834. self.assertEqual(url.query, expected)
  835. url = URL(query={"alpha": "beta"})
  836. self.assertEqual(url.query, expected)
  837. def test_pathIterable(self):
  838. # type: () -> None
  839. """
  840. When a L{URL} is created with a C{path} argument, the C{path} is
  841. converted into a tuple.
  842. """
  843. url = URL(path=["hello", "world"])
  844. self.assertEqual(url.path, ("hello", "world"))
  845. def test_invalidArguments(self):
  846. # type: () -> None
  847. """
  848. Passing an argument of the wrong type to any of the constructor
  849. arguments of L{URL} will raise a descriptive L{TypeError}.
  850. L{URL} typechecks very aggressively to ensure that its constitutent
  851. parts are all properly immutable and to prevent confusing errors when
  852. bad data crops up in a method call long after the code that called the
  853. constructor is off the stack.
  854. """
  855. class Unexpected(object):
  856. def __str__(self):
  857. # type: () -> str
  858. return "wrong"
  859. def __repr__(self):
  860. # type: () -> str
  861. return "<unexpected>"
  862. defaultExpectation = "unicode" if bytes is str else "str"
  863. def assertRaised(raised, expectation, name):
  864. # type: (Any, Text, Text) -> None
  865. self.assertEqual(
  866. str(raised.exception),
  867. "expected {0} for {1}, got {2}".format(
  868. expectation, name, "<unexpected>"
  869. ),
  870. )
  871. def check(param, expectation=defaultExpectation):
  872. # type: (Any, str) -> None
  873. with self.assertRaises(TypeError) as raised:
  874. URL(**{param: Unexpected()}) # type: ignore[arg-type]
  875. assertRaised(raised, expectation, param)
  876. check("scheme")
  877. check("host")
  878. check("fragment")
  879. check("rooted", "bool")
  880. check("userinfo")
  881. check("port", "int or NoneType")
  882. with self.assertRaises(TypeError) as raised:
  883. URL(path=[cast(Text, Unexpected())])
  884. assertRaised(raised, defaultExpectation, "path segment")
  885. with self.assertRaises(TypeError) as raised:
  886. URL(query=[("name", cast(Text, Unexpected()))])
  887. assertRaised(
  888. raised, defaultExpectation + " or NoneType", "query parameter value"
  889. )
  890. with self.assertRaises(TypeError) as raised:
  891. URL(query=[(cast(Text, Unexpected()), "value")])
  892. assertRaised(raised, defaultExpectation, "query parameter name")
  893. # No custom error message for this one, just want to make sure
  894. # non-2-tuples don't get through.
  895. with self.assertRaises(TypeError):
  896. URL(query=[cast(Tuple[Text, Text], Unexpected())])
  897. with self.assertRaises(ValueError):
  898. URL(query=[cast(Tuple[Text, Text], ("k", "v", "vv"))])
  899. with self.assertRaises(ValueError):
  900. URL(query=[cast(Tuple[Text, Text], ("k",))])
  901. url = URL.from_text("https://valid.example.com/")
  902. with self.assertRaises(TypeError) as raised:
  903. url.child(cast(Text, Unexpected()))
  904. assertRaised(raised, defaultExpectation, "path segment")
  905. with self.assertRaises(TypeError) as raised:
  906. url.sibling(cast(Text, Unexpected()))
  907. assertRaised(raised, defaultExpectation, "path segment")
  908. with self.assertRaises(TypeError) as raised:
  909. url.click(cast(Text, Unexpected()))
  910. assertRaised(raised, defaultExpectation, "relative URL")
  911. def test_technicallyTextIsIterableBut(self):
  912. # type: () -> None
  913. """
  914. Technically, L{str} (or L{unicode}, as appropriate) is iterable, but
  915. C{URL(path="foo")} resulting in C{URL.from_text("f/o/o")} is never what
  916. you want.
  917. """
  918. with self.assertRaises(TypeError) as raised:
  919. URL(path="foo")
  920. self.assertEqual(
  921. str(raised.exception),
  922. "expected iterable of text for path, not: {0}".format(repr("foo")),
  923. )
  924. def test_netloc(self):
  925. # type: () -> None
  926. url = URL(scheme="https")
  927. self.assertEqual(url.uses_netloc, True)
  928. self.assertEqual(url.to_text(), "https://")
  929. # scheme, no host, no path, no netloc hack
  930. self.assertEqual(URL.from_text("https:").uses_netloc, False)
  931. # scheme, no host, absolute path, no netloc hack
  932. self.assertEqual(URL.from_text("https:/").uses_netloc, False)
  933. # scheme, no host, no path, netloc hack to indicate :// syntax
  934. self.assertEqual(URL.from_text("https://").uses_netloc, True)
  935. url = URL(scheme="https", uses_netloc=False)
  936. self.assertEqual(url.uses_netloc, False)
  937. self.assertEqual(url.to_text(), "https:")
  938. url = URL(scheme="git+https")
  939. self.assertEqual(url.uses_netloc, True)
  940. self.assertEqual(url.to_text(), "git+https://")
  941. url = URL(scheme="mailto")
  942. self.assertEqual(url.uses_netloc, False)
  943. self.assertEqual(url.to_text(), "mailto:")
  944. url = URL(scheme="ztp")
  945. self.assertEqual(url.uses_netloc, None)
  946. self.assertEqual(url.to_text(), "ztp:")
  947. url = URL.from_text("ztp://test.com")
  948. self.assertEqual(url.uses_netloc, True)
  949. url = URL.from_text("ztp:test:com")
  950. self.assertEqual(url.uses_netloc, False)
  951. def test_ipv6_with_port(self):
  952. # type: () -> None
  953. t = "https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/"
  954. url = URL.from_text(t)
  955. assert url.host == "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
  956. assert url.port == 80
  957. assert SCHEME_PORT_MAP[url.scheme] != url.port
  958. def test_basic(self):
  959. # type: () -> None
  960. text = "https://user:pass@example.com/path/to/here?k=v#nice"
  961. url = URL.from_text(text)
  962. assert url.scheme == "https"
  963. assert url.userinfo == "user:pass"
  964. assert url.host == "example.com"
  965. assert url.path == ("path", "to", "here")
  966. assert url.fragment == "nice"
  967. text = "https://user:pass@127.0.0.1/path/to/here?k=v#nice"
  968. url = URL.from_text(text)
  969. assert url.scheme == "https"
  970. assert url.userinfo == "user:pass"
  971. assert url.host == "127.0.0.1"
  972. assert url.path == ("path", "to", "here")
  973. text = "https://user:pass@[::1]/path/to/here?k=v#nice"
  974. url = URL.from_text(text)
  975. assert url.scheme == "https"
  976. assert url.userinfo == "user:pass"
  977. assert url.host == "::1"
  978. assert url.path == ("path", "to", "here")
  979. def test_invalid_url(self):
  980. # type: () -> None
  981. self.assertRaises(URLParseError, URL.from_text, "#\n\n")
  982. def test_invalid_authority_url(self):
  983. # type: () -> None
  984. self.assertRaises(URLParseError, URL.from_text, "http://abc:\n\n/#")
  985. def test_invalid_ipv6(self):
  986. # type: () -> None
  987. invalid_ipv6_ips = [
  988. "2001::0234:C1ab::A0:aabc:003F",
  989. "2001::1::3F",
  990. ":",
  991. "::::",
  992. "::256.0.0.1",
  993. ]
  994. for ip in invalid_ipv6_ips:
  995. url_text = "http://[" + ip + "]"
  996. self.assertRaises(socket.error, inet_pton, socket.AF_INET6, ip)
  997. self.assertRaises(URLParseError, URL.from_text, url_text)
  998. def test_invalid_port(self):
  999. # type: () -> None
  1000. self.assertRaises(URLParseError, URL.from_text, "ftp://portmouth:smash")
  1001. self.assertRaises(
  1002. ValueError,
  1003. URL.from_text,
  1004. "http://reader.googlewebsite.com:neverforget",
  1005. )
  1006. def test_idna(self):
  1007. # type: () -> None
  1008. u1 = URL.from_text("http://bücher.ch")
  1009. self.assertEqual(u1.host, "bücher.ch")
  1010. self.assertEqual(u1.to_text(), "http://bücher.ch")
  1011. self.assertEqual(u1.to_uri().to_text(), "http://xn--bcher-kva.ch")
  1012. u2 = URL.from_text("https://xn--bcher-kva.ch")
  1013. self.assertEqual(u2.host, "xn--bcher-kva.ch")
  1014. self.assertEqual(u2.to_text(), "https://xn--bcher-kva.ch")
  1015. self.assertEqual(u2.to_iri().to_text(), "https://bücher.ch")
  1016. def test_netloc_slashes(self):
  1017. # type: () -> None
  1018. # basic sanity checks
  1019. url = URL.from_text("mailto:mahmoud@hatnote.com")
  1020. self.assertEqual(url.scheme, "mailto")
  1021. self.assertEqual(url.to_text(), "mailto:mahmoud@hatnote.com")
  1022. url = URL.from_text("http://hatnote.com")
  1023. self.assertEqual(url.scheme, "http")
  1024. self.assertEqual(url.to_text(), "http://hatnote.com")
  1025. # test that unrecognized schemes stay consistent with '//'
  1026. url = URL.from_text("newscheme:a:b:c")
  1027. self.assertEqual(url.scheme, "newscheme")
  1028. self.assertEqual(url.to_text(), "newscheme:a:b:c")
  1029. url = URL.from_text("newerscheme://a/b/c")
  1030. self.assertEqual(url.scheme, "newerscheme")
  1031. self.assertEqual(url.to_text(), "newerscheme://a/b/c")
  1032. # test that reasonable guesses are made
  1033. url = URL.from_text("git+ftp://gitstub.biz/glyph/lefkowitz")
  1034. self.assertEqual(url.scheme, "git+ftp")
  1035. self.assertEqual(url.to_text(), "git+ftp://gitstub.biz/glyph/lefkowitz")
  1036. url = URL.from_text("what+mailto:freerealestate@enotuniq.org")
  1037. self.assertEqual(url.scheme, "what+mailto")
  1038. self.assertEqual(
  1039. url.to_text(), "what+mailto:freerealestate@enotuniq.org"
  1040. )
  1041. url = URL(scheme="ztp", path=("x", "y", "z"), rooted=True)
  1042. self.assertEqual(url.to_text(), "ztp:/x/y/z")
  1043. # also works when the input doesn't include '//'
  1044. url = URL(
  1045. scheme="git+ftp",
  1046. path=("x", "y", "z", ""),
  1047. rooted=True,
  1048. uses_netloc=True,
  1049. )
  1050. # broken bc urlunsplit
  1051. self.assertEqual(url.to_text(), "git+ftp:///x/y/z/")
  1052. # really why would this ever come up but ok
  1053. url = URL.from_text("file:///path/to/heck")
  1054. url2 = url.replace(scheme="mailto")
  1055. self.assertEqual(url2.to_text(), "mailto:/path/to/heck")
  1056. url_text = "unregisteredscheme:///a/b/c"
  1057. url = URL.from_text(url_text)
  1058. no_netloc_url = url.replace(uses_netloc=False)
  1059. self.assertEqual(no_netloc_url.to_text(), "unregisteredscheme:/a/b/c")
  1060. netloc_url = url.replace(uses_netloc=True)
  1061. self.assertEqual(netloc_url.to_text(), url_text)
  1062. return
  1063. def test_rooted_to_relative(self):
  1064. # type: () -> None
  1065. """
  1066. On host-relative URLs, the C{rooted} flag can be updated to indicate
  1067. that the path should no longer be treated as absolute.
  1068. """
  1069. a = URL(path=["hello"])
  1070. self.assertEqual(a.to_text(), "hello")
  1071. b = a.replace(rooted=True)
  1072. self.assertEqual(b.to_text(), "/hello")
  1073. self.assertNotEqual(a, b)
  1074. def test_autorooted(self):
  1075. # type: () -> None
  1076. """
  1077. The C{rooted} flag can be updated in some cases, but it cannot be made
  1078. to conflict with other facts surrounding the URL; for example, all URLs
  1079. involving an authority (host) are inherently rooted because it is not
  1080. syntactically possible to express otherwise; also, once an unrooted URL
  1081. gains a path that starts with an empty string, that empty string is
  1082. elided and it becomes rooted, because these cases are syntactically
  1083. indistinguisable in real URL text.
  1084. """
  1085. relative_path_rooted = URL(path=["", "foo"], rooted=False)
  1086. self.assertEqual(relative_path_rooted.rooted, True)
  1087. relative_flag_rooted = URL(path=["foo"], rooted=True)
  1088. self.assertEqual(relative_flag_rooted.rooted, True)
  1089. self.assertEqual(relative_path_rooted, relative_flag_rooted)
  1090. attempt_unrooted_absolute = URL(host="foo", path=["bar"], rooted=False)
  1091. normal_absolute = URL(host="foo", path=["bar"])
  1092. self.assertEqual(attempt_unrooted_absolute, normal_absolute)
  1093. self.assertEqual(normal_absolute.rooted, True)
  1094. self.assertEqual(attempt_unrooted_absolute.rooted, True)
  1095. def test_rooted_with_port_but_no_host(self):
  1096. # type: () -> None
  1097. """
  1098. URLs which include a ``://`` netloc-separator for any reason are
  1099. inherently rooted, regardless of the value or presence of the
  1100. ``rooted`` constructor argument.
  1101. They may include a netloc-separator because their constructor was
  1102. directly invoked with an explicit host or port, or because they were
  1103. parsed from a string which included the literal ``://`` separator.
  1104. """
  1105. directly_constructed = URL(scheme="udp", port=4900, rooted=False)
  1106. directly_constructed_implict = URL(scheme="udp", port=4900)
  1107. directly_constructed_rooted = URL(scheme="udp", port=4900, rooted=True)
  1108. self.assertEqual(directly_constructed.rooted, True)
  1109. self.assertEqual(directly_constructed_implict.rooted, True)
  1110. self.assertEqual(directly_constructed_rooted.rooted, True)
  1111. parsed = URL.from_text("udp://:4900")
  1112. self.assertEqual(str(directly_constructed), str(parsed))
  1113. self.assertEqual(str(directly_constructed_implict), str(parsed))
  1114. self.assertEqual(directly_constructed.asText(), parsed.asText())
  1115. self.assertEqual(directly_constructed, parsed)
  1116. self.assertEqual(directly_constructed, directly_constructed_implict)
  1117. self.assertEqual(directly_constructed, directly_constructed_rooted)
  1118. self.assertEqual(directly_constructed_implict, parsed)
  1119. self.assertEqual(directly_constructed_rooted, parsed)
  1120. def test_wrong_constructor(self):
  1121. # type: () -> None
  1122. with self.assertRaises(ValueError):
  1123. # whole URL not allowed
  1124. URL(BASIC_URL)
  1125. with self.assertRaises(ValueError):
  1126. # explicitly bad scheme not allowed
  1127. URL("HTTP_____more_like_imHoTTeP")
  1128. def test_encoded_userinfo(self):
  1129. # type: () -> None
  1130. url = URL.from_text("http://user:pass@example.com")
  1131. assert url.userinfo == "user:pass"
  1132. url = url.replace(userinfo="us%20her:pass")
  1133. iri = url.to_iri()
  1134. assert (
  1135. iri.to_text(with_password=True) == "http://us her:pass@example.com"
  1136. )
  1137. assert iri.to_text(with_password=False) == "http://us her:@example.com"
  1138. assert (
  1139. iri.to_uri().to_text(with_password=True)
  1140. == "http://us%20her:pass@example.com"
  1141. )
  1142. def test_hash(self):
  1143. # type: () -> None
  1144. url_map = {}
  1145. url1 = URL.from_text("http://blog.hatnote.com/ask?utm_source=geocity")
  1146. assert hash(url1) == hash(url1) # sanity
  1147. url_map[url1] = 1
  1148. url2 = URL.from_text("http://blog.hatnote.com/ask")
  1149. url2 = url2.set("utm_source", "geocity")
  1150. url_map[url2] = 2
  1151. assert len(url_map) == 1
  1152. assert list(url_map.values()) == [2]
  1153. assert hash(URL()) == hash(URL()) # slightly more sanity
  1154. def test_dir(self):
  1155. # type: () -> None
  1156. url = URL()
  1157. res = dir(url)
  1158. assert len(res) > 15
  1159. # twisted compat
  1160. assert "fromText" not in res
  1161. assert "asText" not in res
  1162. assert "asURI" not in res
  1163. assert "asIRI" not in res
  1164. def test_twisted_compat(self):
  1165. # type: () -> None
  1166. url = URL.fromText("http://example.com/a%20té%C3%A9st")
  1167. assert url.asText() == "http://example.com/a%20té%C3%A9st"
  1168. assert url.asURI().asText() == "http://example.com/a%20t%C3%A9%C3%A9st"
  1169. # TODO: assert url.asIRI().asText() == u'http://example.com/a%20téést'
  1170. def test_set_ordering(self):
  1171. # type: () -> None
  1172. # TODO
  1173. url = URL.from_text("http://example.com/?a=b&c")
  1174. url = url.set("x", "x")
  1175. url = url.add("x", "y")
  1176. assert url.to_text() == "http://example.com/?a=b&x=x&c&x=y"
  1177. # Would expect:
  1178. # assert url.to_text() == u'http://example.com/?a=b&c&x=x&x=y'
  1179. def test_schemeless_path(self):
  1180. # type: () -> None
  1181. "See issue #4"
  1182. u1 = URL.from_text("urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
  1183. u2 = URL.from_text(u1.to_text())
  1184. assert u1 == u2 # sanity testing roundtripping
  1185. u3 = URL.from_text(u1.to_iri().to_text())
  1186. assert u1 == u3
  1187. assert u2 == u3
  1188. # test that colons are ok past the first segment
  1189. u4 = URL.from_text("first-segment/urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
  1190. u5 = u4.to_iri()
  1191. assert u5.to_text() == "first-segment/urn:ietf:wg:oauth:2.0:oob"
  1192. u6 = URL.from_text(u5.to_text()).to_uri()
  1193. assert u5 == u6 # colons stay decoded bc they're not in the first seg
  1194. def test_emoji_domain(self):
  1195. # type: () -> None
  1196. "See issue #7, affecting only narrow builds (2.6-3.3)"
  1197. url = URL.from_text("https://xn--vi8hiv.ws")
  1198. iri = url.to_iri()
  1199. iri.to_text()
  1200. # as long as we don't get ValueErrors, we're good
  1201. def test_delim_in_param(self):
  1202. # type: () -> None
  1203. "Per issue #6 and #8"
  1204. self.assertRaises(ValueError, URL, scheme="http", host="a/c")
  1205. self.assertRaises(ValueError, URL, path=("?",))
  1206. self.assertRaises(ValueError, URL, path=("#",))
  1207. self.assertRaises(ValueError, URL, query=(("&", "test")))
  1208. def test_empty_paths_eq(self):
  1209. # type: () -> None
  1210. u1 = URL.from_text("http://example.com/")
  1211. u2 = URL.from_text("http://example.com")
  1212. assert u1 == u2
  1213. u1 = URL.from_text("http://example.com")
  1214. u2 = URL.from_text("http://example.com")
  1215. assert u1 == u2
  1216. u1 = URL.from_text("http://example.com")
  1217. u2 = URL.from_text("http://example.com/")
  1218. assert u1 == u2
  1219. u1 = URL.from_text("http://example.com/")
  1220. u2 = URL.from_text("http://example.com/")
  1221. assert u1 == u2
  1222. def test_from_text_type(self):
  1223. # type: () -> None
  1224. assert URL.from_text("#ok").fragment == "ok" # sanity
  1225. self.assertRaises(TypeError, URL.from_text, b"bytes://x.y.z")
  1226. self.assertRaises(TypeError, URL.from_text, object())
  1227. def test_from_text_bad_authority(self):
  1228. # type: () -> None
  1229. # bad ipv6 brackets
  1230. self.assertRaises(URLParseError, URL.from_text, "http://[::1/")
  1231. self.assertRaises(URLParseError, URL.from_text, "http://::1]/")
  1232. self.assertRaises(URLParseError, URL.from_text, "http://[[::1]/")
  1233. self.assertRaises(URLParseError, URL.from_text, "http://[::1]]/")
  1234. # empty port
  1235. self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1:")
  1236. # non-integer port
  1237. self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1:hi")
  1238. # extra port colon (makes for an invalid host)
  1239. self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1::80")
  1240. def test_normalize(self):
  1241. # type: () -> None
  1242. url = URL.from_text("HTTP://Example.com/A%61/./../A%61?B%62=C%63#D%64")
  1243. assert url.get("Bb") == []
  1244. assert url.get("B%62") == ["C%63"]
  1245. assert len(url.path) == 4
  1246. # test that most expected normalizations happen
  1247. norm_url = url.normalize()
  1248. assert norm_url.scheme == "http"
  1249. assert norm_url.host == "example.com"
  1250. assert norm_url.path == ("Aa",)
  1251. assert norm_url.get("Bb") == ["Cc"]
  1252. assert norm_url.fragment == "Dd"
  1253. assert norm_url.to_text() == "http://example.com/Aa?Bb=Cc#Dd"
  1254. # test that flags work
  1255. noop_norm_url = url.normalize(
  1256. scheme=False, host=False, path=False, query=False, fragment=False
  1257. )
  1258. assert noop_norm_url == url
  1259. # test that empty paths get at least one slash
  1260. slashless_url = URL.from_text("http://example.io")
  1261. slashful_url = slashless_url.normalize()
  1262. assert slashful_url.to_text() == "http://example.io/"
  1263. # test case normalization for percent encoding
  1264. delimited_url = URL.from_text("/a%2fb/cd%3f?k%3d=v%23#test")
  1265. norm_delimited_url = delimited_url.normalize()
  1266. assert norm_delimited_url.to_text() == "/a%2Fb/cd%3F?k%3D=v%23#test"
  1267. # test invalid percent encoding during normalize
  1268. assert (
  1269. URL(path=("", "%te%sts")).normalize(percents=False).to_text()
  1270. == "/%te%sts"
  1271. )
  1272. assert URL(path=("", "%te%sts")).normalize().to_text() == "/%25te%25sts"
  1273. percenty_url = URL(
  1274. scheme="ftp",
  1275. path=["%%%", "%a%b"],
  1276. query=[("%", "%%")],
  1277. fragment="%",
  1278. userinfo="%:%",
  1279. )
  1280. assert (
  1281. percenty_url.to_text(with_password=True)
  1282. == "ftp://%:%@/%%%/%a%b?%=%%#%"
  1283. )
  1284. assert (
  1285. percenty_url.normalize().to_text(with_password=True)
  1286. == "ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25"
  1287. )
  1288. def test_str(self):
  1289. # type: () -> None
  1290. # see also issue #49
  1291. text = "http://example.com/á/y%20a%20y/?b=%25"
  1292. url = URL.from_text(text)
  1293. assert unicode(url) == text
  1294. assert bytes(url) == b"http://example.com/%C3%A1/y%20a%20y/?b=%25"
  1295. if PY2:
  1296. assert isinstance(str(url), bytes)
  1297. assert isinstance(unicode(url), unicode)
  1298. else:
  1299. assert isinstance(str(url), unicode)
  1300. assert isinstance(bytes(url), bytes)
  1301. def test_idna_corners(self):
  1302. # type: () -> None
  1303. url = URL.from_text("http://abé.com/")
  1304. assert url.to_iri().host == "abé.com"
  1305. assert url.to_uri().host == "xn--ab-cja.com"
  1306. url = URL.from_text("http://ドメイン.テスト.co.jp#test")
  1307. assert url.to_iri().host == "ドメイン.テスト.co.jp"
  1308. assert url.to_uri().host == "xn--eckwd4c7c.xn--zckzah.co.jp"
  1309. assert url.to_uri().get_decoded_url().host == "ドメイン.テスト.co.jp"
  1310. text = "http://Example.com"
  1311. assert (
  1312. URL.from_text(text).to_uri().get_decoded_url().host == "example.com"
  1313. )