Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

url.py 6.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. from __future__ import absolute_import
  2. from collections import namedtuple
  3. from ..exceptions import LocationParseError
  4. url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
  5. # We only want to normalize urls with an HTTP(S) scheme.
  6. # urllib3 infers URLs without a scheme (None) to be http.
  7. NORMALIZABLE_SCHEMES = ('http', 'https', None)
  8. class Url(namedtuple('Url', url_attrs)):
  9. """
  10. Datastructure for representing an HTTP URL. Used as a return value for
  11. :func:`parse_url`. Both the scheme and host are normalized as they are
  12. both case-insensitive according to RFC 3986.
  13. """
  14. __slots__ = ()
  15. def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
  16. query=None, fragment=None):
  17. if path and not path.startswith('/'):
  18. path = '/' + path
  19. if scheme:
  20. scheme = scheme.lower()
  21. if host and scheme in NORMALIZABLE_SCHEMES:
  22. host = host.lower()
  23. return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
  24. query, fragment)
  25. @property
  26. def hostname(self):
  27. """For backwards-compatibility with urlparse. We're nice like that."""
  28. return self.host
  29. @property
  30. def request_uri(self):
  31. """Absolute path including the query string."""
  32. uri = self.path or '/'
  33. if self.query is not None:
  34. uri += '?' + self.query
  35. return uri
  36. @property
  37. def netloc(self):
  38. """Network location including host and port"""
  39. if self.port:
  40. return '%s:%d' % (self.host, self.port)
  41. return self.host
  42. @property
  43. def url(self):
  44. """
  45. Convert self into a url
  46. This function should more or less round-trip with :func:`.parse_url`. The
  47. returned url may not be exactly the same as the url inputted to
  48. :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
  49. with a blank port will have : removed).
  50. Example: ::
  51. >>> U = parse_url('http://google.com/mail/')
  52. >>> U.url
  53. 'http://google.com/mail/'
  54. >>> Url('http', 'username:password', 'host.com', 80,
  55. ... '/path', 'query', 'fragment').url
  56. 'http://username:password@host.com:80/path?query#fragment'
  57. """
  58. scheme, auth, host, port, path, query, fragment = self
  59. url = ''
  60. # We use "is not None" we want things to happen with empty strings (or 0 port)
  61. if scheme is not None:
  62. url += scheme + '://'
  63. if auth is not None:
  64. url += auth + '@'
  65. if host is not None:
  66. url += host
  67. if port is not None:
  68. url += ':' + str(port)
  69. if path is not None:
  70. url += path
  71. if query is not None:
  72. url += '?' + query
  73. if fragment is not None:
  74. url += '#' + fragment
  75. return url
  76. def __str__(self):
  77. return self.url
  78. def split_first(s, delims):
  79. """
  80. Given a string and an iterable of delimiters, split on the first found
  81. delimiter. Return two split parts and the matched delimiter.
  82. If not found, then the first part is the full input string.
  83. Example::
  84. >>> split_first('foo/bar?baz', '?/=')
  85. ('foo', 'bar?baz', '/')
  86. >>> split_first('foo/bar?baz', '123')
  87. ('foo/bar?baz', '', None)
  88. Scales linearly with number of delims. Not ideal for large number of delims.
  89. """
  90. min_idx = None
  91. min_delim = None
  92. for d in delims:
  93. idx = s.find(d)
  94. if idx < 0:
  95. continue
  96. if min_idx is None or idx < min_idx:
  97. min_idx = idx
  98. min_delim = d
  99. if min_idx is None or min_idx < 0:
  100. return s, '', None
  101. return s[:min_idx], s[min_idx + 1:], min_delim
  102. def parse_url(url):
  103. """
  104. Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
  105. performed to parse incomplete urls. Fields not provided will be None.
  106. Partly backwards-compatible with :mod:`urlparse`.
  107. Example::
  108. >>> parse_url('http://google.com/mail/')
  109. Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
  110. >>> parse_url('google.com:80')
  111. Url(scheme=None, host='google.com', port=80, path=None, ...)
  112. >>> parse_url('/foo?bar')
  113. Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
  114. """
  115. # While this code has overlap with stdlib's urlparse, it is much
  116. # simplified for our needs and less annoying.
  117. # Additionally, this implementations does silly things to be optimal
  118. # on CPython.
  119. if not url:
  120. # Empty
  121. return Url()
  122. scheme = None
  123. auth = None
  124. host = None
  125. port = None
  126. path = None
  127. fragment = None
  128. query = None
  129. # Scheme
  130. if '://' in url:
  131. scheme, url = url.split('://', 1)
  132. # Find the earliest Authority Terminator
  133. # (http://tools.ietf.org/html/rfc3986#section-3.2)
  134. url, path_, delim = split_first(url, ['/', '?', '#'])
  135. if delim:
  136. # Reassemble the path
  137. path = delim + path_
  138. # Auth
  139. if '@' in url:
  140. # Last '@' denotes end of auth part
  141. auth, url = url.rsplit('@', 1)
  142. # IPv6
  143. if url and url[0] == '[':
  144. host, url = url.split(']', 1)
  145. host += ']'
  146. # Port
  147. if ':' in url:
  148. _host, port = url.split(':', 1)
  149. if not host:
  150. host = _host
  151. if port:
  152. # If given, ports must be integers. No whitespace, no plus or
  153. # minus prefixes, no non-integer digits such as ^2 (superscript).
  154. if not port.isdigit():
  155. raise LocationParseError(url)
  156. try:
  157. port = int(port)
  158. except ValueError:
  159. raise LocationParseError(url)
  160. else:
  161. # Blank ports are cool, too. (rfc3986#section-3.2.3)
  162. port = None
  163. elif not host and url:
  164. host = url
  165. if not path:
  166. return Url(scheme, auth, host, port, path, query, fragment)
  167. # Fragment
  168. if '#' in path:
  169. path, fragment = path.split('#', 1)
  170. # Query
  171. if '?' in path:
  172. path, query = path.split('?', 1)
  173. return Url(scheme, auth, host, port, path, query, fragment)
  174. def get_host(url):
  175. """
  176. Deprecated. Use :func:`parse_url` instead.
  177. """
  178. p = parse_url(url)
  179. return p.scheme or 'http', p.hostname, p.port