123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278 |
- # -*- test-case-name: twisted.python.test.test_urlpath -*-
- # Copyright (c) Twisted Matrix Laboratories.
- # See LICENSE for details.
-
- """
- L{URLPath}, a representation of a URL.
- """
-
- from typing import cast
- from urllib.parse import quote as urlquote, unquote as urlunquote, urlunsplit
-
- from hyperlink import URL as _URL
-
- _allascii = b"".join([chr(x).encode("ascii") for x in range(1, 128)])
-
-
- def _rereconstituter(name):
- """
- Attriute declaration to preserve mutability on L{URLPath}.
-
- @param name: a public attribute name
- @type name: native L{str}
-
- @return: a descriptor which retrieves the private version of the attribute
- on get and calls rerealize on set.
- """
- privateName = "_" + name
- return property(
- lambda self: getattr(self, privateName),
- lambda self, value: (
- setattr(
- self,
- privateName,
- value if isinstance(value, bytes) else value.encode("charmap"),
- )
- or self._reconstitute()
- ),
- )
-
-
- class URLPath:
- """
- A representation of a URL.
-
- @ivar scheme: The scheme of the URL (e.g. 'http').
- @type scheme: L{bytes}
-
- @ivar netloc: The network location ("host").
- @type netloc: L{bytes}
-
- @ivar path: The path on the network location.
- @type path: L{bytes}
-
- @ivar query: The query argument (the portion after ? in the URL).
- @type query: L{bytes}
-
- @ivar fragment: The page fragment (the portion after # in the URL).
- @type fragment: L{bytes}
- """
-
- def __init__(
- self, scheme=b"", netloc=b"localhost", path=b"", query=b"", fragment=b""
- ):
- self._scheme = scheme or b"http"
- self._netloc = netloc
- self._path = path or b"/"
- self._query = query
- self._fragment = fragment
- self._reconstitute()
-
- def _reconstitute(self):
- """
- Reconstitute this L{URLPath} from all its given attributes.
- """
- urltext = urlquote(
- urlunsplit(
- (self._scheme, self._netloc, self._path, self._query, self._fragment)
- ),
- safe=_allascii,
- )
- self._url = _URL.fromText(urltext.encode("ascii").decode("ascii"))
-
- scheme = _rereconstituter("scheme")
- netloc = _rereconstituter("netloc")
- path = _rereconstituter("path")
- query = _rereconstituter("query")
- fragment = _rereconstituter("fragment")
-
- @classmethod
- def _fromURL(cls, urlInstance):
- """
- Reconstruct all the public instance variables of this L{URLPath} from
- its underlying L{_URL}.
-
- @param urlInstance: the object to base this L{URLPath} on.
- @type urlInstance: L{_URL}
-
- @return: a new L{URLPath}
- """
- self = cls.__new__(cls)
- self._url = urlInstance.replace(path=urlInstance.path or [""])
- self._scheme = self._url.scheme.encode("ascii")
- self._netloc = self._url.authority().encode("ascii")
- self._path = (
- _URL(path=self._url.path, rooted=True).asURI().asText().encode("ascii")
- )
- self._query = (_URL(query=self._url.query).asURI().asText().encode("ascii"))[1:]
- self._fragment = self._url.fragment.encode("ascii")
- return self
-
- def pathList(self, unquote=False, copy=True):
- """
- Split this URL's path into its components.
-
- @param unquote: whether to remove %-encoding from the returned strings.
-
- @param copy: (ignored, do not use)
-
- @return: The components of C{self.path}
- @rtype: L{list} of L{bytes}
- """
- segments = self._url.path
- mapper = lambda x: x.encode("ascii")
- if unquote:
- mapper = lambda x, m=mapper: m(urlunquote(x))
- return [b""] + [mapper(segment) for segment in segments]
-
- @classmethod
- def fromString(klass, url):
- """
- Make a L{URLPath} from a L{str} or L{unicode}.
-
- @param url: A L{str} representation of a URL.
- @type url: L{str} or L{unicode}.
-
- @return: a new L{URLPath} derived from the given string.
- @rtype: L{URLPath}
- """
- if not isinstance(url, str):
- raise ValueError("'url' must be a str")
- return klass._fromURL(_URL.fromText(url))
-
- @classmethod
- def fromBytes(klass, url):
- """
- Make a L{URLPath} from a L{bytes}.
-
- @param url: A L{bytes} representation of a URL.
- @type url: L{bytes}
-
- @return: a new L{URLPath} derived from the given L{bytes}.
- @rtype: L{URLPath}
-
- @since: 15.4
- """
- if not isinstance(url, bytes):
- raise ValueError("'url' must be bytes")
- quoted = urlquote(url, safe=_allascii)
- return klass.fromString(quoted)
-
- @classmethod
- def fromRequest(klass, request):
- """
- Make a L{URLPath} from a L{twisted.web.http.Request}.
-
- @param request: A L{twisted.web.http.Request} to make the L{URLPath}
- from.
-
- @return: a new L{URLPath} derived from the given request.
- @rtype: L{URLPath}
- """
- return klass.fromBytes(request.prePathURL())
-
- def _mod(self, newURL, keepQuery):
- """
- Return a modified copy of C{self} using C{newURL}, keeping the query
- string if C{keepQuery} is C{True}.
-
- @param newURL: a L{URL} to derive a new L{URLPath} from
- @type newURL: L{URL}
-
- @param keepQuery: if C{True}, preserve the query parameters from
- C{self} on the new L{URLPath}; if C{False}, give the new L{URLPath}
- no query parameters.
- @type keepQuery: L{bool}
-
- @return: a new L{URLPath}
- """
- return self._fromURL(
- newURL.replace(fragment="", query=self._url.query if keepQuery else ())
- )
-
- def sibling(self, path, keepQuery=False):
- """
- Get the sibling of the current L{URLPath}. A sibling is a file which
- is in the same directory as the current file.
-
- @param path: The path of the sibling.
- @type path: L{bytes}
-
- @param keepQuery: Whether to keep the query parameters on the returned
- L{URLPath}.
- @type keepQuery: L{bool}
-
- @return: a new L{URLPath}
- """
- return self._mod(self._url.sibling(path.decode("ascii")), keepQuery)
-
- def child(self, path, keepQuery=False):
- """
- Get the child of this L{URLPath}.
-
- @param path: The path of the child.
- @type path: L{bytes}
-
- @param keepQuery: Whether to keep the query parameters on the returned
- L{URLPath}.
- @type keepQuery: L{bool}
-
- @return: a new L{URLPath}
- """
- return self._mod(self._url.child(path.decode("ascii")), keepQuery)
-
- def parent(self, keepQuery=False):
- """
- Get the parent directory of this L{URLPath}.
-
- @param keepQuery: Whether to keep the query parameters on the returned
- L{URLPath}.
- @type keepQuery: L{bool}
-
- @return: a new L{URLPath}
- """
- return self._mod(self._url.click(".."), keepQuery)
-
- def here(self, keepQuery=False):
- """
- Get the current directory of this L{URLPath}.
-
- @param keepQuery: Whether to keep the query parameters on the returned
- L{URLPath}.
- @type keepQuery: L{bool}
-
- @return: a new L{URLPath}
- """
- return self._mod(self._url.click("."), keepQuery)
-
- def click(self, st):
- """
- Return a path which is the URL where a browser would presumably take
- you if you clicked on a link with an HREF as given.
-
- @param st: A relative URL, to be interpreted relative to C{self} as the
- base URL.
- @type st: L{bytes}
-
- @return: a new L{URLPath}
- """
- return self._fromURL(self._url.click(st.decode("ascii")))
-
- def __str__(self) -> str:
- """
- The L{str} of a L{URLPath} is its URL text.
- """
- return cast(str, self._url.asURI().asText())
-
- def __repr__(self) -> str:
- """
- The L{repr} of a L{URLPath} is an eval-able expression which will
- construct a similar L{URLPath}.
- """
- return "URLPath(scheme={!r}, netloc={!r}, path={!r}, query={!r}, fragment={!r})".format(
- self.scheme,
- self.netloc,
- self.path,
- self.query,
- self.fragment,
- )
|