Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

client.py 57KB


  1. # -*- test-case-name: twisted.web.test.test_webclient,twisted.web.test.test_agent -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. HTTP client.
  6. """
  7. import collections
  8. import os
  9. import warnings
  10. import zlib
  11. from functools import wraps
  12. from typing import Iterable
  13. from urllib.parse import urldefrag, urljoin, urlunparse as _urlunparse
  14. from zope.interface import implementer
  15. from incremental import Version
  16. from twisted.internet import defer, protocol, task
  17. from twisted.internet.abstract import isIPv6Address
  18. from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
  19. from twisted.internet.interfaces import IOpenSSLContextFactory, IProtocol
  20. from twisted.logger import Logger
  21. from twisted.python.compat import nativeString, networkString
  22. from twisted.python.components import proxyForInterface
  23. from twisted.python.deprecate import (
  24. deprecatedModuleAttribute,
  25. getDeprecationWarningString,
  26. )
  27. from twisted.python.failure import Failure
  28. from twisted.web import error, http
  29. from twisted.web._newclient import _ensureValidMethod, _ensureValidURI
  30. from twisted.web.http_headers import Headers
  31. from twisted.web.iweb import (
  32. UNKNOWN_LENGTH,
  33. IAgent,
  34. IAgentEndpointFactory,
  35. IBodyProducer,
  36. IPolicyForHTTPS,
  37. IResponse,
  38. )
  39. def urlunparse(parts):
  40. result = _urlunparse(tuple(p.decode("charmap") for p in parts))
  41. return result.encode("charmap")
  42. class PartialDownloadError(error.Error):
  43. """
  44. Page was only partially downloaded, we got disconnected in middle.
  45. @ivar response: All of the response body which was downloaded.
  46. """
  47. class URI:
  48. """
  49. A URI object.
  50. @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21}
  51. """
  52. def __init__(self, scheme, netloc, host, port, path, params, query, fragment):
  53. """
  54. @type scheme: L{bytes}
  55. @param scheme: URI scheme specifier.
  56. @type netloc: L{bytes}
  57. @param netloc: Network location component.
  58. @type host: L{bytes}
  59. @param host: Host name. For IPv6 address literals the brackets are
  60. stripped.
  61. @type port: L{int}
  62. @param port: Port number.
  63. @type path: L{bytes}
  64. @param path: Hierarchical path.
  65. @type params: L{bytes}
  66. @param params: Parameters for last path segment.
  67. @type query: L{bytes}
  68. @param query: Query string.
  69. @type fragment: L{bytes}
  70. @param fragment: Fragment identifier.
  71. """
  72. self.scheme = scheme
  73. self.netloc = netloc
  74. self.host = host.strip(b"[]")
  75. self.port = port
  76. self.path = path
  77. self.params = params
  78. self.query = query
  79. self.fragment = fragment
  80. @classmethod
  81. def fromBytes(cls, uri, defaultPort=None):
  82. """
  83. Parse the given URI into a L{URI}.
  84. @type uri: C{bytes}
  85. @param uri: URI to parse.
  86. @type defaultPort: C{int} or L{None}
  87. @param defaultPort: An alternate value to use as the port if the URI
  88. does not include one.
  89. @rtype: L{URI}
  90. @return: Parsed URI instance.
  91. """
  92. uri = uri.strip()
  93. scheme, netloc, path, params, query, fragment = http.urlparse(uri)
  94. if defaultPort is None:
  95. if scheme == b"https":
  96. defaultPort = 443
  97. else:
  98. defaultPort = 80
  99. if b":" in netloc:
  100. host, port = netloc.rsplit(b":", 1)
  101. try:
  102. port = int(port)
  103. except ValueError:
  104. host, port = netloc, defaultPort
  105. else:
  106. host, port = netloc, defaultPort
  107. return cls(scheme, netloc, host, port, path, params, query, fragment)
  108. def toBytes(self):
  109. """
  110. Assemble the individual parts of the I{URI} into a fully formed I{URI}.
  111. @rtype: C{bytes}
  112. @return: A fully formed I{URI}.
  113. """
  114. return urlunparse(
  115. (
  116. self.scheme,
  117. self.netloc,
  118. self.path,
  119. self.params,
  120. self.query,
  121. self.fragment,
  122. )
  123. )
  124. @property
  125. def originForm(self):
  126. """
  127. The absolute I{URI} path including I{URI} parameters, query string and
  128. fragment identifier.
  129. @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21#section-5.3}
  130. @return: The absolute path in original form.
  131. @rtype: L{bytes}
  132. """
  133. # The HTTP bis draft says the origin form should not include the
  134. # fragment.
  135. path = urlunparse((b"", b"", self.path, self.params, self.query, b""))
  136. if path == b"":
  137. path = b"/"
  138. return path
  139. def _urljoin(base, url):
  140. """
  141. Construct a full ("absolute") URL by combining a "base URL" with another
  142. URL. Informally, this uses components of the base URL, in particular the
  143. addressing scheme, the network location and (part of) the path, to provide
  144. missing components in the relative URL.
  145. Additionally, the fragment identifier is preserved according to the HTTP
  146. 1.1 bis draft.
  147. @type base: C{bytes}
  148. @param base: Base URL.
  149. @type url: C{bytes}
  150. @param url: URL to combine with C{base}.
  151. @return: An absolute URL resulting from the combination of C{base} and
  152. C{url}.
  153. @see: L{urllib.parse.urljoin()}
  154. @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
  155. """
  156. base, baseFrag = urldefrag(base)
  157. url, urlFrag = urldefrag(urljoin(base, url))
  158. return urljoin(url, b"#" + (urlFrag or baseFrag))
  159. def _makeGetterFactory(url, factoryFactory, contextFactory=None, *args, **kwargs):
  160. """
  161. Create and connect an HTTP page getting factory.
  162. Any additional positional or keyword arguments are used when calling
  163. C{factoryFactory}.
  164. @param factoryFactory: Factory factory that is called with C{url}, C{args}
  165. and C{kwargs} to produce the getter
  166. @param contextFactory: Context factory to use when creating a secure
  167. connection, defaulting to L{None}
  168. @return: The factory created by C{factoryFactory}
  169. """
  170. uri = URI.fromBytes(_ensureValidURI(url.strip()))
  171. factory = factoryFactory(url, *args, **kwargs)
  172. from twisted.internet import reactor
  173. if uri.scheme == b"https":
  174. from twisted.internet import ssl
  175. if contextFactory is None:
  176. contextFactory = ssl.ClientContextFactory()
  177. reactor.connectSSL(nativeString(uri.host), uri.port, factory, contextFactory)
  178. else:
  179. reactor.connectTCP(nativeString(uri.host), uri.port, factory)
  180. return factory
  181. # The code which follows is based on the new HTTP client implementation. It
  182. # should be significantly better than anything above, though it is not yet
  183. # feature equivalent.
  184. from twisted.web._newclient import (
  185. HTTP11ClientProtocol,
  186. PotentialDataLoss,
  187. Request,
  188. RequestGenerationFailed,
  189. RequestNotSent,
  190. RequestTransmissionFailed,
  191. Response,
  192. ResponseDone,
  193. ResponseFailed,
  194. ResponseNeverReceived,
  195. _WrapperException,
  196. )
  197. from twisted.web.error import SchemeNotSupported
  198. try:
  199. from OpenSSL import SSL
  200. except ImportError:
  201. SSL = None # type: ignore[assignment]
  202. else:
  203. from twisted.internet.ssl import (
  204. CertificateOptions,
  205. optionsForClientTLS,
  206. platformTrust,
  207. )
  208. def _requireSSL(decoratee):
  209. """
  210. The decorated method requires pyOpenSSL to be present, or it raises
  211. L{NotImplementedError}.
  212. @param decoratee: A function which requires pyOpenSSL.
  213. @type decoratee: L{callable}
  214. @return: A function which raises L{NotImplementedError} if pyOpenSSL is not
  215. installed; otherwise, if it is installed, simply return C{decoratee}.
  216. @rtype: L{callable}
  217. """
  218. if SSL is None:
  219. @wraps(decoratee)
  220. def raiseNotImplemented(*a, **kw):
  221. """
  222. pyOpenSSL is not available.
  223. @param a: The positional arguments for C{decoratee}.
  224. @param kw: The keyword arguments for C{decoratee}.
  225. @raise NotImplementedError: Always.
  226. """
  227. raise NotImplementedError("SSL support unavailable")
  228. return raiseNotImplemented
  229. return decoratee
  230. class WebClientContextFactory:
  231. """
  232. This class is deprecated. Please simply use L{Agent} as-is, or if you want
  233. to customize something, use L{BrowserLikePolicyForHTTPS}.
  234. A L{WebClientContextFactory} is an HTTPS policy which totally ignores the
  235. hostname and port. It performs basic certificate verification, however the
  236. lack of validation of service identity (e.g. hostname validation) means it
  237. is still vulnerable to man-in-the-middle attacks. Don't use it any more.
  238. """
  239. def _getCertificateOptions(self, hostname, port):
  240. """
  241. Return a L{CertificateOptions}.
  242. @param hostname: ignored
  243. @param port: ignored
  244. @return: A new CertificateOptions instance.
  245. @rtype: L{CertificateOptions}
  246. """
  247. return CertificateOptions(method=SSL.SSLv23_METHOD, trustRoot=platformTrust())
  248. @_requireSSL
  249. def getContext(self, hostname, port):
  250. """
  251. Return an L{OpenSSL.SSL.Context}.
  252. @param hostname: ignored
  253. @param port: ignored
  254. @return: A new SSL context.
  255. @rtype: L{OpenSSL.SSL.Context}
  256. """
  257. return self._getCertificateOptions(hostname, port).getContext()
  258. @implementer(IPolicyForHTTPS)
  259. class BrowserLikePolicyForHTTPS:
  260. """
  261. SSL connection creator for web clients.
  262. """
  263. def __init__(self, trustRoot=None):
  264. self._trustRoot = trustRoot
  265. @_requireSSL
  266. def creatorForNetloc(self, hostname, port):
  267. """
  268. Create a L{client connection creator
  269. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a
  270. given network location.
  271. @param hostname: The hostname part of the URI.
  272. @type hostname: L{bytes}
  273. @param port: The port part of the URI.
  274. @type port: L{int}
  275. @return: a connection creator with appropriate verification
  276. restrictions set
  277. @rtype: L{client connection creator
  278. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>}
  279. """
  280. return optionsForClientTLS(hostname.decode("ascii"), trustRoot=self._trustRoot)
  281. deprecatedModuleAttribute(
  282. Version("Twisted", 14, 0, 0),
  283. getDeprecationWarningString(
  284. WebClientContextFactory,
  285. Version("Twisted", 14, 0, 0),
  286. replacement=BrowserLikePolicyForHTTPS,
  287. ).split("; ")[1],
  288. WebClientContextFactory.__module__,
  289. WebClientContextFactory.__name__,
  290. )
  291. @implementer(IPolicyForHTTPS)
  292. class HostnameCachingHTTPSPolicy:
  293. """
  294. IPolicyForHTTPS that wraps a L{IPolicyForHTTPS} and caches the created
  295. L{IOpenSSLClientConnectionCreator}.
  296. This policy will cache up to C{cacheSize}
  297. L{client connection creators <twisted.internet.interfaces.
  298. IOpenSSLClientConnectionCreator>} for reuse in subsequent requests to
  299. the same hostname.
  300. @ivar _policyForHTTPS: See C{policyforHTTPS} parameter of L{__init__}.
  301. @ivar _cache: A cache associating hostnames to their
  302. L{client connection creators <twisted.internet.interfaces.
  303. IOpenSSLClientConnectionCreator>}.
  304. @type _cache: L{collections.OrderedDict}
  305. @ivar _cacheSize: See C{cacheSize} parameter of L{__init__}.
  306. @since: Twisted 19.2.0
  307. """
  308. def __init__(self, policyforHTTPS, cacheSize=20):
  309. """
  310. @param policyforHTTPS: The IPolicyForHTTPS to wrap.
  311. @type policyforHTTPS: L{IPolicyForHTTPS}
  312. @param cacheSize: The maximum size of the hostname cache.
  313. @type cacheSize: L{int}
  314. """
  315. self._policyForHTTPS = policyforHTTPS
  316. self._cache = collections.OrderedDict()
  317. self._cacheSize = cacheSize
  318. def creatorForNetloc(self, hostname, port):
  319. """
  320. Create a L{client connection creator
  321. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a
  322. given network location and cache it for future use.
  323. @param hostname: The hostname part of the URI.
  324. @type hostname: L{bytes}
  325. @param port: The port part of the URI.
  326. @type port: L{int}
  327. @return: a connection creator with appropriate verification
  328. restrictions set
  329. @rtype: L{client connection creator
  330. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>}
  331. """
  332. host = hostname.decode("ascii")
  333. try:
  334. creator = self._cache.pop(host)
  335. except KeyError:
  336. creator = self._policyForHTTPS.creatorForNetloc(hostname, port)
  337. self._cache[host] = creator
  338. if len(self._cache) > self._cacheSize:
  339. self._cache.popitem(last=False)
  340. return creator
  341. @implementer(IOpenSSLContextFactory)
  342. class _ContextFactoryWithContext:
  343. """
  344. A L{_ContextFactoryWithContext} is like a
  345. L{twisted.internet.ssl.ContextFactory} with a pre-created context.
  346. @ivar _context: A Context.
  347. @type _context: L{OpenSSL.SSL.Context}
  348. """
  349. def __init__(self, context):
  350. """
  351. Initialize a L{_ContextFactoryWithContext} with a context.
  352. @param context: An SSL context.
  353. @type context: L{OpenSSL.SSL.Context}
  354. """
  355. self._context = context
  356. def getContext(self):
  357. """
  358. Return the context created by
  359. L{_DeprecatedToCurrentPolicyForHTTPS._webContextFactory}.
  360. @return: A context.
  361. @rtype: L{OpenSSL.SSL.Context}
  362. """
  363. return self._context
  364. @implementer(IPolicyForHTTPS)
  365. class _DeprecatedToCurrentPolicyForHTTPS:
  366. """
  367. Adapt a web context factory to a normal context factory.
  368. @ivar _webContextFactory: An object providing a getContext method with
  369. C{hostname} and C{port} arguments.
  370. @type _webContextFactory: L{WebClientContextFactory} (or object with a
  371. similar C{getContext} method).
  372. """
  373. def __init__(self, webContextFactory):
  374. """
  375. Wrap a web context factory in an L{IPolicyForHTTPS}.
  376. @param webContextFactory: An object providing a getContext method with
  377. C{hostname} and C{port} arguments.
  378. @type webContextFactory: L{WebClientContextFactory} (or object with a
  379. similar C{getContext} method).
  380. """
  381. self._webContextFactory = webContextFactory
  382. def creatorForNetloc(self, hostname, port):
  383. """
  384. Called the wrapped web context factory's C{getContext} method with a
  385. hostname and port number and return the resulting context object.
  386. @param hostname: The hostname part of the URI.
  387. @type hostname: L{bytes}
  388. @param port: The port part of the URI.
  389. @type port: L{int}
  390. @return: A context factory.
  391. @rtype: L{IOpenSSLContextFactory}
  392. """
  393. context = self._webContextFactory.getContext(hostname, port)
  394. return _ContextFactoryWithContext(context)
  395. @implementer(IBodyProducer)
  396. class FileBodyProducer:
  397. """
  398. L{FileBodyProducer} produces bytes from an input file object incrementally
  399. and writes them to a consumer.
  400. Since file-like objects cannot be read from in an event-driven manner,
  401. L{FileBodyProducer} uses a L{Cooperator} instance to schedule reads from
  402. the file. This process is also paused and resumed based on notifications
  403. from the L{IConsumer} provider being written to.
  404. The file is closed after it has been read, or if the producer is stopped
  405. early.
  406. @ivar _inputFile: Any file-like object, bytes read from which will be
  407. written to a consumer.
  408. @ivar _cooperate: A method like L{Cooperator.cooperate} which is used to
  409. schedule all reads.
  410. @ivar _readSize: The number of bytes to read from C{_inputFile} at a time.
  411. """
  412. def __init__(self, inputFile, cooperator=task, readSize=2 ** 16):
  413. self._inputFile = inputFile
  414. self._cooperate = cooperator.cooperate
  415. self._readSize = readSize
  416. self.length = self._determineLength(inputFile)
  417. def _determineLength(self, fObj):
  418. """
  419. Determine how many bytes can be read out of C{fObj} (assuming it is not
  420. modified from this point on). If the determination cannot be made,
  421. return C{UNKNOWN_LENGTH}.
  422. """
  423. try:
  424. seek = fObj.seek
  425. tell = fObj.tell
  426. except AttributeError:
  427. return UNKNOWN_LENGTH
  428. originalPosition = tell()
  429. seek(0, os.SEEK_END)
  430. end = tell()
  431. seek(originalPosition, os.SEEK_SET)
  432. return end - originalPosition
  433. def stopProducing(self):
  434. """
  435. Permanently stop writing bytes from the file to the consumer by
  436. stopping the underlying L{CooperativeTask}.
  437. """
  438. self._inputFile.close()
  439. try:
  440. self._task.stop()
  441. except task.TaskFinished:
  442. pass
  443. def startProducing(self, consumer):
  444. """
  445. Start a cooperative task which will read bytes from the input file and
  446. write them to C{consumer}. Return a L{Deferred} which fires after all
  447. bytes have been written. If this L{Deferred} is cancelled before it is
  448. fired, stop reading and writing bytes.
  449. @param consumer: Any L{IConsumer} provider
  450. """
  451. self._task = self._cooperate(self._writeloop(consumer))
  452. d = self._task.whenDone()
  453. def maybeStopped(reason):
  454. if reason.check(defer.CancelledError):
  455. self.stopProducing()
  456. elif reason.check(task.TaskStopped):
  457. pass
  458. else:
  459. return reason
  460. # IBodyProducer.startProducing's Deferred isn't supposed to fire if
  461. # stopProducing is called.
  462. return defer.Deferred()
  463. d.addCallbacks(lambda ignored: None, maybeStopped)
  464. return d
  465. def _writeloop(self, consumer):
  466. """
  467. Return an iterator which reads one chunk of bytes from the input file
  468. and writes them to the consumer for each time it is iterated.
  469. """
  470. while True:
  471. bytes = self._inputFile.read(self._readSize)
  472. if not bytes:
  473. self._inputFile.close()
  474. break
  475. consumer.write(bytes)
  476. yield None
  477. def pauseProducing(self):
  478. """
  479. Temporarily suspend copying bytes from the input file to the consumer
  480. by pausing the L{CooperativeTask} which drives that activity.
  481. """
  482. self._task.pause()
  483. def resumeProducing(self):
  484. """
  485. Undo the effects of a previous C{pauseProducing} and resume copying
  486. bytes to the consumer by resuming the L{CooperativeTask} which drives
  487. the write activity.
  488. """
  489. self._task.resume()
  490. class _HTTP11ClientFactory(protocol.Factory):
  491. """
  492. A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}.
  493. @ivar _quiescentCallback: The quiescent callback to be passed to protocol
  494. instances, used to return them to the connection pool.
  495. @ivar _metadata: Metadata about the low-level connection details,
  496. used to make the repr more useful.
  497. @since: 11.1
  498. """
  499. def __init__(self, quiescentCallback, metadata):
  500. self._quiescentCallback = quiescentCallback
  501. self._metadata = metadata
  502. def __repr__(self) -> str:
  503. return "_HTTP11ClientFactory({}, {})".format(
  504. self._quiescentCallback, self._metadata
  505. )
  506. def buildProtocol(self, addr):
  507. return HTTP11ClientProtocol(self._quiescentCallback)
  508. class _RetryingHTTP11ClientProtocol:
  509. """
  510. A wrapper for L{HTTP11ClientProtocol} that automatically retries requests.
  511. @ivar _clientProtocol: The underlying L{HTTP11ClientProtocol}.
  512. @ivar _newConnection: A callable that creates a new connection for a
  513. retry.
  514. """
  515. def __init__(self, clientProtocol, newConnection):
  516. self._clientProtocol = clientProtocol
  517. self._newConnection = newConnection
  518. def _shouldRetry(self, method, exception, bodyProducer):
  519. """
  520. Indicate whether request should be retried.
  521. Only returns C{True} if method is idempotent, no response was
  522. received, the reason for the failed request was not due to
  523. user-requested cancellation, and no body was sent. The latter
  524. requirement may be relaxed in the future, and PUT added to approved
  525. method list.
  526. @param method: The method of the request.
  527. @type method: L{bytes}
  528. """
  529. if method not in (b"GET", b"HEAD", b"OPTIONS", b"DELETE", b"TRACE"):
  530. return False
  531. if not isinstance(
  532. exception,
  533. (RequestNotSent, RequestTransmissionFailed, ResponseNeverReceived),
  534. ):
  535. return False
  536. if isinstance(exception, _WrapperException):
  537. for aFailure in exception.reasons:
  538. if aFailure.check(defer.CancelledError):
  539. return False
  540. if bodyProducer is not None:
  541. return False
  542. return True
  543. def request(self, request):
  544. """
  545. Do a request, and retry once (with a new connection) if it fails in
  546. a retryable manner.
  547. @param request: A L{Request} instance that will be requested using the
  548. wrapped protocol.
  549. """
  550. d = self._clientProtocol.request(request)
  551. def failed(reason):
  552. if self._shouldRetry(request.method, reason.value, request.bodyProducer):
  553. return self._newConnection().addCallback(
  554. lambda connection: connection.request(request)
  555. )
  556. else:
  557. return reason
  558. d.addErrback(failed)
  559. return d
  560. class HTTPConnectionPool:
  561. """
  562. A pool of persistent HTTP connections.
  563. Features:
  564. - Cached connections will eventually time out.
  565. - Limits on maximum number of persistent connections.
  566. Connections are stored using keys, which should be chosen such that any
  567. connections stored under a given key can be used interchangeably.
  568. Failed requests done using previously cached connections will be retried
  569. once if they use an idempotent method (e.g. GET), in case the HTTP server
  570. timed them out.
  571. @ivar persistent: Boolean indicating whether connections should be
  572. persistent. Connections are persistent by default.
  573. @ivar maxPersistentPerHost: The maximum number of cached persistent
  574. connections for a C{host:port} destination.
  575. @type maxPersistentPerHost: C{int}
  576. @ivar cachedConnectionTimeout: Number of seconds a cached persistent
  577. connection will stay open before disconnecting.
  578. @ivar retryAutomatically: C{boolean} indicating whether idempotent
  579. requests should be retried once if no response was received.
  580. @ivar _factory: The factory used to connect to the proxy.
  581. @ivar _connections: Map (scheme, host, port) to lists of
  582. L{HTTP11ClientProtocol} instances.
  583. @ivar _timeouts: Map L{HTTP11ClientProtocol} instances to a
  584. C{IDelayedCall} instance of their timeout.
  585. @since: 12.1
  586. """
  587. _factory = _HTTP11ClientFactory
  588. maxPersistentPerHost = 2
  589. cachedConnectionTimeout = 240
  590. retryAutomatically = True
  591. _log = Logger()
  592. def __init__(self, reactor, persistent=True):
  593. self._reactor = reactor
  594. self.persistent = persistent
  595. self._connections = {}
  596. self._timeouts = {}
  597. def getConnection(self, key, endpoint):
  598. """
  599. Supply a connection, newly created or retrieved from the pool, to be
  600. used for one HTTP request.
  601. The connection will remain out of the pool (not available to be
  602. returned from future calls to this method) until one HTTP request has
  603. been completed over it.
  604. Afterwards, if the connection is still open, it will automatically be
  605. added to the pool.
  606. @param key: A unique key identifying connections that can be used
  607. interchangeably.
  608. @param endpoint: An endpoint that can be used to open a new connection
  609. if no cached connection is available.
  610. @return: A C{Deferred} that will fire with a L{HTTP11ClientProtocol}
  611. (or a wrapper) that can be used to send a single HTTP request.
  612. """
  613. # Try to get cached version:
  614. connections = self._connections.get(key)
  615. while connections:
  616. connection = connections.pop(0)
  617. # Cancel timeout:
  618. self._timeouts[connection].cancel()
  619. del self._timeouts[connection]
  620. if connection.state == "QUIESCENT":
  621. if self.retryAutomatically:
  622. newConnection = lambda: self._newConnection(key, endpoint)
  623. connection = _RetryingHTTP11ClientProtocol(
  624. connection, newConnection
  625. )
  626. return defer.succeed(connection)
  627. return self._newConnection(key, endpoint)
  628. def _newConnection(self, key, endpoint):
  629. """
  630. Create a new connection.
  631. This implements the new connection code path for L{getConnection}.
  632. """
  633. def quiescentCallback(protocol):
  634. self._putConnection(key, protocol)
  635. factory = self._factory(quiescentCallback, repr(endpoint))
  636. return endpoint.connect(factory)
  637. def _removeConnection(self, key, connection):
  638. """
  639. Remove a connection from the cache and disconnect it.
  640. """
  641. connection.transport.loseConnection()
  642. self._connections[key].remove(connection)
  643. del self._timeouts[connection]
  644. def _putConnection(self, key, connection):
  645. """
  646. Return a persistent connection to the pool. This will be called by
  647. L{HTTP11ClientProtocol} when the connection becomes quiescent.
  648. """
  649. if connection.state != "QUIESCENT":
  650. # Log with traceback for debugging purposes:
  651. try:
  652. raise RuntimeError(
  653. "BUG: Non-quiescent protocol added to connection pool."
  654. )
  655. except BaseException:
  656. self._log.failure(
  657. "BUG: Non-quiescent protocol added to connection pool."
  658. )
  659. return
  660. connections = self._connections.setdefault(key, [])
  661. if len(connections) == self.maxPersistentPerHost:
  662. dropped = connections.pop(0)
  663. dropped.transport.loseConnection()
  664. self._timeouts[dropped].cancel()
  665. del self._timeouts[dropped]
  666. connections.append(connection)
  667. cid = self._reactor.callLater(
  668. self.cachedConnectionTimeout, self._removeConnection, key, connection
  669. )
  670. self._timeouts[connection] = cid
  671. def closeCachedConnections(self):
  672. """
  673. Close all persistent connections and remove them from the pool.
  674. @return: L{defer.Deferred} that fires when all connections have been
  675. closed.
  676. """
  677. results = []
  678. for protocols in self._connections.values():
  679. for p in protocols:
  680. results.append(p.abort())
  681. self._connections = {}
  682. for dc in self._timeouts.values():
  683. dc.cancel()
  684. self._timeouts = {}
  685. return defer.gatherResults(results).addCallback(lambda ign: None)
  686. class _AgentBase:
  687. """
  688. Base class offering common facilities for L{Agent}-type classes.
  689. @ivar _reactor: The C{IReactorTime} implementation which will be used by
  690. the pool, and perhaps by subclasses as well.
  691. @ivar _pool: The L{HTTPConnectionPool} used to manage HTTP connections.
  692. """
  693. def __init__(self, reactor, pool):
  694. if pool is None:
  695. pool = HTTPConnectionPool(reactor, False)
  696. self._reactor = reactor
  697. self._pool = pool
  698. def _computeHostValue(self, scheme, host, port):
  699. """
  700. Compute the string to use for the value of the I{Host} header, based on
  701. the given scheme, host name, and port number.
  702. """
  703. if isIPv6Address(nativeString(host)):
  704. host = b"[" + host + b"]"
  705. if (scheme, port) in ((b"http", 80), (b"https", 443)):
  706. return host
  707. return b"%b:%d" % (host, port)
  708. def _requestWithEndpoint(
  709. self, key, endpoint, method, parsedURI, headers, bodyProducer, requestPath
  710. ):
  711. """
  712. Issue a new request, given the endpoint and the path sent as part of
  713. the request.
  714. """
  715. if not isinstance(method, bytes):
  716. raise TypeError(f"method={method!r} is {type(method)}, but must be bytes")
  717. method = _ensureValidMethod(method)
  718. # Create minimal headers, if necessary:
  719. if headers is None:
  720. headers = Headers()
  721. if not headers.hasHeader(b"host"):
  722. headers = headers.copy()
  723. headers.addRawHeader(
  724. b"host",
  725. self._computeHostValue(
  726. parsedURI.scheme, parsedURI.host, parsedURI.port
  727. ),
  728. )
  729. d = self._pool.getConnection(key, endpoint)
  730. def cbConnected(proto):
  731. return proto.request(
  732. Request._construct(
  733. method,
  734. requestPath,
  735. headers,
  736. bodyProducer,
  737. persistent=self._pool.persistent,
  738. parsedURI=parsedURI,
  739. )
  740. )
  741. d.addCallback(cbConnected)
  742. return d
  743. @implementer(IAgentEndpointFactory)
  744. class _StandardEndpointFactory:
  745. """
  746. Standard HTTP endpoint destinations - TCP for HTTP, TCP+TLS for HTTPS.
  747. @ivar _policyForHTTPS: A web context factory which will be used to create
  748. SSL context objects for any SSL connections the agent needs to make.
  749. @ivar _connectTimeout: If not L{None}, the timeout passed to
  750. L{HostnameEndpoint} for specifying the connection timeout.
  751. @ivar _bindAddress: If not L{None}, the address passed to
  752. L{HostnameEndpoint} for specifying the local address to bind to.
  753. """
  754. def __init__(self, reactor, contextFactory, connectTimeout, bindAddress):
  755. """
  756. @param reactor: A provider to use to create endpoints.
  757. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  758. types.
  759. @param contextFactory: A factory for TLS contexts, to control the
  760. verification parameters of OpenSSL.
  761. @type contextFactory: L{IPolicyForHTTPS}.
  762. @param connectTimeout: The amount of time that this L{Agent} will wait
  763. for the peer to accept a connection.
  764. @type connectTimeout: L{float} or L{None}
  765. @param bindAddress: The local address for client sockets to bind to.
  766. @type bindAddress: L{bytes} or L{None}
  767. """
  768. self._reactor = reactor
  769. self._policyForHTTPS = contextFactory
  770. self._connectTimeout = connectTimeout
  771. self._bindAddress = bindAddress
  772. def endpointForURI(self, uri):
  773. """
  774. Connect directly over TCP for C{b'http'} scheme, and TLS for
  775. C{b'https'}.
  776. @param uri: L{URI} to connect to.
  777. @return: Endpoint to connect to.
  778. @rtype: L{IStreamClientEndpoint}
  779. """
  780. kwargs = {}
  781. if self._connectTimeout is not None:
  782. kwargs["timeout"] = self._connectTimeout
  783. kwargs["bindAddress"] = self._bindAddress
  784. try:
  785. host = nativeString(uri.host)
  786. except UnicodeDecodeError:
  787. raise ValueError(
  788. (
  789. "The host of the provided URI ({uri.host!r}) "
  790. "contains non-ASCII octets, it should be ASCII "
  791. "decodable."
  792. ).format(uri=uri)
  793. )
  794. endpoint = HostnameEndpoint(self._reactor, host, uri.port, **kwargs)
  795. if uri.scheme == b"http":
  796. return endpoint
  797. elif uri.scheme == b"https":
  798. connectionCreator = self._policyForHTTPS.creatorForNetloc(
  799. uri.host, uri.port
  800. )
  801. return wrapClientTLS(connectionCreator, endpoint)
  802. else:
  803. raise SchemeNotSupported(f"Unsupported scheme: {uri.scheme!r}")
  804. @implementer(IAgent)
  805. class Agent(_AgentBase):
  806. """
  807. L{Agent} is a very basic HTTP client. It supports I{HTTP} and I{HTTPS}
  808. scheme URIs.
  809. @ivar _pool: An L{HTTPConnectionPool} instance.
  810. @ivar _endpointFactory: The L{IAgentEndpointFactory} which will
  811. be used to create endpoints for outgoing connections.
  812. @since: 9.0
  813. """
  814. def __init__(
  815. self,
  816. reactor,
  817. contextFactory=BrowserLikePolicyForHTTPS(),
  818. connectTimeout=None,
  819. bindAddress=None,
  820. pool=None,
  821. ):
  822. """
  823. Create an L{Agent}.
  824. @param reactor: A reactor for this L{Agent} to place outgoing
  825. connections.
  826. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  827. types.
  828. @param contextFactory: A factory for TLS contexts, to control the
  829. verification parameters of OpenSSL. The default is to use a
  830. L{BrowserLikePolicyForHTTPS}, so unless you have special
  831. requirements you can leave this as-is.
  832. @type contextFactory: L{IPolicyForHTTPS}.
  833. @param connectTimeout: The amount of time that this L{Agent} will wait
  834. for the peer to accept a connection.
  835. @type connectTimeout: L{float}
  836. @param bindAddress: The local address for client sockets to bind to.
  837. @type bindAddress: L{bytes}
  838. @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which
  839. case a non-persistent L{HTTPConnectionPool} instance will be
  840. created.
  841. @type pool: L{HTTPConnectionPool}
  842. """
  843. if not IPolicyForHTTPS.providedBy(contextFactory):
  844. warnings.warn(
  845. repr(contextFactory)
  846. + " was passed as the HTTPS policy for an Agent, but it does "
  847. "not provide IPolicyForHTTPS. Since Twisted 14.0, you must "
  848. "pass a provider of IPolicyForHTTPS.",
  849. stacklevel=2,
  850. category=DeprecationWarning,
  851. )
  852. contextFactory = _DeprecatedToCurrentPolicyForHTTPS(contextFactory)
  853. endpointFactory = _StandardEndpointFactory(
  854. reactor, contextFactory, connectTimeout, bindAddress
  855. )
  856. self._init(reactor, endpointFactory, pool)
  857. @classmethod
  858. def usingEndpointFactory(cls, reactor, endpointFactory, pool=None):
  859. """
  860. Create a new L{Agent} that will use the endpoint factory to figure
  861. out how to connect to the server.
  862. @param reactor: A reactor for this L{Agent} to place outgoing
  863. connections.
  864. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  865. types.
  866. @param endpointFactory: Used to construct endpoints which the
  867. HTTP client will connect with.
  868. @type endpointFactory: an L{IAgentEndpointFactory} provider.
  869. @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which
  870. case a non-persistent L{HTTPConnectionPool} instance will be
  871. created.
  872. @type pool: L{HTTPConnectionPool}
  873. @return: A new L{Agent}.
  874. """
  875. agent = cls.__new__(cls)
  876. agent._init(reactor, endpointFactory, pool)
  877. return agent
  878. def _init(self, reactor, endpointFactory, pool):
  879. """
  880. Initialize a new L{Agent}.
  881. @param reactor: A reactor for this L{Agent} to place outgoing
  882. connections.
  883. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  884. types.
  885. @param endpointFactory: Used to construct endpoints which the
  886. HTTP client will connect with.
  887. @type endpointFactory: an L{IAgentEndpointFactory} provider.
  888. @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which
  889. case a non-persistent L{HTTPConnectionPool} instance will be
  890. created.
  891. @type pool: L{HTTPConnectionPool}
  892. @return: A new L{Agent}.
  893. """
  894. _AgentBase.__init__(self, reactor, pool)
  895. self._endpointFactory = endpointFactory
  896. def _getEndpoint(self, uri):
  897. """
  898. Get an endpoint for the given URI, using C{self._endpointFactory}.
  899. @param uri: The URI of the request.
  900. @type uri: L{URI}
  901. @return: An endpoint which can be used to connect to given address.
  902. """
  903. return self._endpointFactory.endpointForURI(uri)
  904. def request(self, method, uri, headers=None, bodyProducer=None):
  905. """
  906. Issue a request to the server indicated by the given C{uri}.
  907. An existing connection from the connection pool may be used or a new
  908. one may be created.
  909. I{HTTP} and I{HTTPS} schemes are supported in C{uri}.
  910. @see: L{twisted.web.iweb.IAgent.request}
  911. """
  912. uri = _ensureValidURI(uri.strip())
  913. parsedURI = URI.fromBytes(uri)
  914. try:
  915. endpoint = self._getEndpoint(parsedURI)
  916. except SchemeNotSupported:
  917. return defer.fail(Failure())
  918. key = (parsedURI.scheme, parsedURI.host, parsedURI.port)
  919. return self._requestWithEndpoint(
  920. key,
  921. endpoint,
  922. method,
  923. parsedURI,
  924. headers,
  925. bodyProducer,
  926. parsedURI.originForm,
  927. )
  928. @implementer(IAgent)
  929. class ProxyAgent(_AgentBase):
  930. """
  931. An HTTP agent able to cross HTTP proxies.
  932. @ivar _proxyEndpoint: The endpoint used to connect to the proxy.
  933. @since: 11.1
  934. """
  935. def __init__(self, endpoint, reactor=None, pool=None):
  936. if reactor is None:
  937. from twisted.internet import reactor
  938. _AgentBase.__init__(self, reactor, pool)
  939. self._proxyEndpoint = endpoint
  940. def request(self, method, uri, headers=None, bodyProducer=None):
  941. """
  942. Issue a new request via the configured proxy.
  943. """
  944. uri = _ensureValidURI(uri.strip())
  945. # Cache *all* connections under the same key, since we are only
  946. # connecting to a single destination, the proxy:
  947. key = ("http-proxy", self._proxyEndpoint)
  948. # To support proxying HTTPS via CONNECT, we will use key
  949. # ("http-proxy-CONNECT", scheme, host, port), and an endpoint that
  950. # wraps _proxyEndpoint with an additional callback to do the CONNECT.
  951. return self._requestWithEndpoint(
  952. key,
  953. self._proxyEndpoint,
  954. method,
  955. URI.fromBytes(uri),
  956. headers,
  957. bodyProducer,
  958. uri,
  959. )
  960. class _FakeUrllib2Request:
  961. """
  962. A fake C{urllib2.Request} object for C{cookielib} to work with.
  963. @see: U{http://docs.python.org/library/urllib2.html#request-objects}
  964. @type uri: native L{str}
  965. @ivar uri: Request URI.
  966. @type headers: L{twisted.web.http_headers.Headers}
  967. @ivar headers: Request headers.
  968. @type type: native L{str}
  969. @ivar type: The scheme of the URI.
  970. @type host: native L{str}
  971. @ivar host: The host[:port] of the URI.
  972. @since: 11.1
  973. """
  974. def __init__(self, uri):
  975. """
  976. Create a fake Urllib2 request.
  977. @param uri: Request URI.
  978. @type uri: L{bytes}
  979. """
  980. self.uri = nativeString(uri)
  981. self.headers = Headers()
  982. _uri = URI.fromBytes(uri)
  983. self.type = nativeString(_uri.scheme)
  984. self.host = nativeString(_uri.host)
  985. if (_uri.scheme, _uri.port) not in ((b"http", 80), (b"https", 443)):
  986. # If it's not a schema on the regular port, add the port.
  987. self.host += ":" + str(_uri.port)
  988. self.origin_req_host = nativeString(_uri.host)
  989. self.unverifiable = lambda _: False
  990. def has_header(self, header):
  991. return self.headers.hasHeader(networkString(header))
  992. def add_unredirected_header(self, name, value):
  993. self.headers.addRawHeader(networkString(name), networkString(value))
  994. def get_full_url(self):
  995. return self.uri
  996. def get_header(self, name, default=None):
  997. headers = self.headers.getRawHeaders(networkString(name), default)
  998. if headers is not None:
  999. headers = [nativeString(x) for x in headers]
  1000. return headers[0]
  1001. return None
  1002. def get_host(self):
  1003. return self.host
  1004. def get_type(self):
  1005. return self.type
  1006. def is_unverifiable(self):
  1007. # In theory this shouldn't be hardcoded.
  1008. return False
  1009. class _FakeUrllib2Response:
  1010. """
  1011. A fake C{urllib2.Response} object for C{cookielib} to work with.
  1012. @type response: C{twisted.web.iweb.IResponse}
  1013. @ivar response: Underlying Twisted Web response.
  1014. @since: 11.1
  1015. """
  1016. def __init__(self, response):
  1017. self.response = response
  1018. def info(self):
  1019. class _Meta:
  1020. def getheaders(zelf, name):
  1021. # PY2
  1022. headers = self.response.headers.getRawHeaders(name, [])
  1023. return headers
  1024. def get_all(zelf, name, default):
  1025. # PY3
  1026. headers = self.response.headers.getRawHeaders(
  1027. networkString(name), default
  1028. )
  1029. h = [nativeString(x) for x in headers]
  1030. return h
  1031. return _Meta()
  1032. @implementer(IAgent)
  1033. class CookieAgent:
  1034. """
  1035. L{CookieAgent} extends the basic L{Agent} to add RFC-compliant
  1036. handling of HTTP cookies. Cookies are written to and extracted
  1037. from a C{cookielib.CookieJar} instance.
  1038. The same cookie jar instance will be used for any requests through this
  1039. agent, mutating it whenever a I{Set-Cookie} header appears in a response.
  1040. @type _agent: L{twisted.web.client.Agent}
  1041. @ivar _agent: Underlying Twisted Web agent to issue requests through.
  1042. @type cookieJar: C{cookielib.CookieJar}
  1043. @ivar cookieJar: Initialized cookie jar to read cookies from and store
  1044. cookies to.
  1045. @since: 11.1
  1046. """
  1047. def __init__(self, agent, cookieJar):
  1048. self._agent = agent
  1049. self.cookieJar = cookieJar
  1050. def request(self, method, uri, headers=None, bodyProducer=None):
  1051. """
  1052. Issue a new request to the wrapped L{Agent}.
  1053. Send a I{Cookie} header if a cookie for C{uri} is stored in
  1054. L{CookieAgent.cookieJar}. Cookies are automatically extracted and
  1055. stored from requests.
  1056. If a C{'cookie'} header appears in C{headers} it will override the
  1057. automatic cookie header obtained from the cookie jar.
  1058. @see: L{Agent.request}
  1059. """
  1060. if headers is None:
  1061. headers = Headers()
  1062. lastRequest = _FakeUrllib2Request(uri)
  1063. # Setting a cookie header explicitly will disable automatic request
  1064. # cookies.
  1065. if not headers.hasHeader(b"cookie"):
  1066. self.cookieJar.add_cookie_header(lastRequest)
  1067. cookieHeader = lastRequest.get_header("Cookie", None)
  1068. if cookieHeader is not None:
  1069. headers = headers.copy()
  1070. headers.addRawHeader(b"cookie", networkString(cookieHeader))
  1071. d = self._agent.request(method, uri, headers, bodyProducer)
  1072. d.addCallback(self._extractCookies, lastRequest)
  1073. return d
  1074. def _extractCookies(self, response, request):
  1075. """
  1076. Extract response cookies and store them in the cookie jar.
  1077. @type response: L{twisted.web.iweb.IResponse}
  1078. @param response: Twisted Web response.
  1079. @param request: A urllib2 compatible request object.
  1080. """
  1081. resp = _FakeUrllib2Response(response)
  1082. self.cookieJar.extract_cookies(resp, request)
  1083. return response
  1084. class GzipDecoder(proxyForInterface(IResponse)): # type: ignore[misc]
  1085. """
  1086. A wrapper for a L{Response} instance which handles gzip'ed body.
  1087. @ivar original: The original L{Response} object.
  1088. @since: 11.1
  1089. """
  1090. def __init__(self, response):
  1091. self.original = response
  1092. self.length = UNKNOWN_LENGTH
  1093. def deliverBody(self, protocol):
  1094. """
  1095. Override C{deliverBody} to wrap the given C{protocol} with
  1096. L{_GzipProtocol}.
  1097. """
  1098. self.original.deliverBody(_GzipProtocol(protocol, self.original))
  1099. class _GzipProtocol(proxyForInterface(IProtocol)): # type: ignore[misc]
  1100. """
  1101. A L{Protocol} implementation which wraps another one, transparently
  1102. decompressing received data.
  1103. @ivar _zlibDecompress: A zlib decompress object used to decompress the data
  1104. stream.
  1105. @ivar _response: A reference to the original response, in case of errors.
  1106. @since: 11.1
  1107. """
  1108. def __init__(self, protocol, response):
  1109. self.original = protocol
  1110. self._response = response
  1111. self._zlibDecompress = zlib.decompressobj(16 + zlib.MAX_WBITS)
  1112. def dataReceived(self, data):
  1113. """
  1114. Decompress C{data} with the zlib decompressor, forwarding the raw data
  1115. to the original protocol.
  1116. """
  1117. try:
  1118. rawData = self._zlibDecompress.decompress(data)
  1119. except zlib.error:
  1120. raise ResponseFailed([Failure()], self._response)
  1121. if rawData:
  1122. self.original.dataReceived(rawData)
  1123. def connectionLost(self, reason):
  1124. """
  1125. Forward the connection lost event, flushing remaining data from the
  1126. decompressor if any.
  1127. """
  1128. try:
  1129. rawData = self._zlibDecompress.flush()
  1130. except zlib.error:
  1131. raise ResponseFailed([reason, Failure()], self._response)
  1132. if rawData:
  1133. self.original.dataReceived(rawData)
  1134. self.original.connectionLost(reason)
  1135. @implementer(IAgent)
  1136. class ContentDecoderAgent:
  1137. """
  1138. An L{Agent} wrapper to handle encoded content.
  1139. It takes care of declaring the support for content in the
  1140. I{Accept-Encoding} header and automatically decompresses the received data
  1141. if the I{Content-Encoding} header indicates a supported encoding.
  1142. For example::
  1143. agent = ContentDecoderAgent(Agent(reactor),
  1144. [(b'gzip', GzipDecoder)])
  1145. @param agent: The agent to wrap
  1146. @type agent: L{IAgent}
  1147. @param decoders: A sequence of (name, decoder) objects. The name
  1148. declares which encoding the decoder supports. The decoder must accept
  1149. an L{IResponse} and return an L{IResponse} when called. The order
  1150. determines how the decoders are advertised to the server. Names must
  1151. be unique.not be duplicated.
  1152. @type decoders: sequence of (L{bytes}, L{callable}) tuples
  1153. @since: 11.1
  1154. @see: L{GzipDecoder}
  1155. """
  1156. def __init__(self, agent, decoders):
  1157. self._agent = agent
  1158. self._decoders = dict(decoders)
  1159. self._supported = b",".join([decoder[0] for decoder in decoders])
  1160. def request(self, method, uri, headers=None, bodyProducer=None):
  1161. """
  1162. Send a client request which declares supporting compressed content.
  1163. @see: L{Agent.request}.
  1164. """
  1165. if headers is None:
  1166. headers = Headers()
  1167. else:
  1168. headers = headers.copy()
  1169. headers.addRawHeader(b"accept-encoding", self._supported)
  1170. deferred = self._agent.request(method, uri, headers, bodyProducer)
  1171. return deferred.addCallback(self._handleResponse)
  1172. def _handleResponse(self, response):
  1173. """
  1174. Check if the response is encoded, and wrap it to handle decompression.
  1175. """
  1176. contentEncodingHeaders = response.headers.getRawHeaders(b"content-encoding", [])
  1177. contentEncodingHeaders = b",".join(contentEncodingHeaders).split(b",")
  1178. while contentEncodingHeaders:
  1179. name = contentEncodingHeaders.pop().strip()
  1180. decoder = self._decoders.get(name)
  1181. if decoder is not None:
  1182. response = decoder(response)
  1183. else:
  1184. # Add it back
  1185. contentEncodingHeaders.append(name)
  1186. break
  1187. if contentEncodingHeaders:
  1188. response.headers.setRawHeaders(
  1189. b"content-encoding", [b",".join(contentEncodingHeaders)]
  1190. )
  1191. else:
  1192. response.headers.removeHeader(b"content-encoding")
  1193. return response
  1194. _canonicalHeaderName = Headers()._canonicalNameCaps
  1195. _defaultSensitiveHeaders = frozenset(
  1196. [
  1197. b"Authorization",
  1198. b"Cookie",
  1199. b"Cookie2",
  1200. b"Proxy-Authorization",
  1201. b"WWW-Authenticate",
  1202. ]
  1203. )
  1204. @implementer(IAgent)
  1205. class RedirectAgent:
  1206. """
  1207. An L{Agent} wrapper which handles HTTP redirects.
  1208. The implementation is rather strict: 301 and 302 behaves like 307, not
  1209. redirecting automatically on methods different from I{GET} and I{HEAD}.
  1210. See L{BrowserLikeRedirectAgent} for a redirecting Agent that behaves more
  1211. like a web browser.
  1212. @param redirectLimit: The maximum number of times the agent is allowed to
  1213. follow redirects before failing with a L{error.InfiniteRedirection}.
  1214. @param sensitiveHeaderNames: An iterable of C{bytes} enumerating the names
  1215. of headers that must not be transmitted when redirecting to a different
  1216. origins. These will be consulted in addition to the protocol-specified
  1217. set of headers that contain sensitive information.
  1218. @cvar _redirectResponses: A L{list} of HTTP status codes to be redirected
  1219. for I{GET} and I{HEAD} methods.
  1220. @cvar _seeOtherResponses: A L{list} of HTTP status codes to be redirected
  1221. for any method and the method altered to I{GET}.
  1222. @since: 11.1
  1223. """
  1224. _redirectResponses = [
  1225. http.MOVED_PERMANENTLY,
  1226. http.FOUND,
  1227. http.TEMPORARY_REDIRECT,
  1228. http.PERMANENT_REDIRECT,
  1229. ]
  1230. _seeOtherResponses = [http.SEE_OTHER]
  1231. def __init__(
  1232. self,
  1233. agent: IAgent,
  1234. redirectLimit: int = 20,
  1235. sensitiveHeaderNames: Iterable[bytes] = (),
  1236. ):
  1237. self._agent = agent
  1238. self._redirectLimit = redirectLimit
  1239. sensitive = {_canonicalHeaderName(each) for each in sensitiveHeaderNames}
  1240. sensitive.update(_defaultSensitiveHeaders)
  1241. self._sensitiveHeaderNames = sensitive
  1242. def request(self, method, uri, headers=None, bodyProducer=None):
  1243. """
  1244. Send a client request following HTTP redirects.
  1245. @see: L{Agent.request}.
  1246. """
  1247. deferred = self._agent.request(method, uri, headers, bodyProducer)
  1248. return deferred.addCallback(self._handleResponse, method, uri, headers, 0)
  1249. def _resolveLocation(self, requestURI, location):
  1250. """
  1251. Resolve the redirect location against the request I{URI}.
  1252. @type requestURI: C{bytes}
  1253. @param requestURI: The request I{URI}.
  1254. @type location: C{bytes}
  1255. @param location: The redirect location.
  1256. @rtype: C{bytes}
  1257. @return: Final resolved I{URI}.
  1258. """
  1259. return _urljoin(requestURI, location)
  1260. def _handleRedirect(self, response, method, uri, headers, redirectCount):
  1261. """
  1262. Handle a redirect response, checking the number of redirects already
  1263. followed, and extracting the location header fields.
  1264. """
  1265. if redirectCount >= self._redirectLimit:
  1266. err = error.InfiniteRedirection(
  1267. response.code, b"Infinite redirection detected", location=uri
  1268. )
  1269. raise ResponseFailed([Failure(err)], response)
  1270. locationHeaders = response.headers.getRawHeaders(b"location", [])
  1271. if not locationHeaders:
  1272. err = error.RedirectWithNoLocation(
  1273. response.code, b"No location header field", uri
  1274. )
  1275. raise ResponseFailed([Failure(err)], response)
  1276. location = self._resolveLocation(uri, locationHeaders[0])
  1277. if headers:
  1278. parsedURI = URI.fromBytes(uri)
  1279. parsedLocation = URI.fromBytes(location)
  1280. sameOrigin = (
  1281. (parsedURI.scheme == parsedLocation.scheme)
  1282. and (parsedURI.host == parsedLocation.host)
  1283. and (parsedURI.port == parsedLocation.port)
  1284. )
  1285. if not sameOrigin:
  1286. headers = Headers(
  1287. {
  1288. rawName: rawValue
  1289. for rawName, rawValue in headers.getAllRawHeaders()
  1290. if rawName not in self._sensitiveHeaderNames
  1291. }
  1292. )
  1293. deferred = self._agent.request(method, location, headers)
  1294. def _chainResponse(newResponse):
  1295. newResponse.setPreviousResponse(response)
  1296. return newResponse
  1297. deferred.addCallback(_chainResponse)
  1298. return deferred.addCallback(
  1299. self._handleResponse, method, uri, headers, redirectCount + 1
  1300. )
  1301. def _handleResponse(self, response, method, uri, headers, redirectCount):
  1302. """
  1303. Handle the response, making another request if it indicates a redirect.
  1304. """
  1305. if response.code in self._redirectResponses:
  1306. if method not in (b"GET", b"HEAD"):
  1307. err = error.PageRedirect(response.code, location=uri)
  1308. raise ResponseFailed([Failure(err)], response)
  1309. return self._handleRedirect(response, method, uri, headers, redirectCount)
  1310. elif response.code in self._seeOtherResponses:
  1311. return self._handleRedirect(response, b"GET", uri, headers, redirectCount)
  1312. return response
  1313. class BrowserLikeRedirectAgent(RedirectAgent):
  1314. """
  1315. An L{Agent} wrapper which handles HTTP redirects in the same fashion as web
  1316. browsers.
  1317. Unlike L{RedirectAgent}, the implementation is more relaxed: 301 and 302
  1318. behave like 303, redirecting automatically on any method and altering the
  1319. redirect request to a I{GET}.
  1320. @see: L{RedirectAgent}
  1321. @since: 13.1
  1322. """
  1323. _redirectResponses = [http.TEMPORARY_REDIRECT]
  1324. _seeOtherResponses = [
  1325. http.MOVED_PERMANENTLY,
  1326. http.FOUND,
  1327. http.SEE_OTHER,
  1328. http.PERMANENT_REDIRECT,
  1329. ]
  1330. class _ReadBodyProtocol(protocol.Protocol):
  1331. """
  1332. Protocol that collects data sent to it.
  1333. This is a helper for L{IResponse.deliverBody}, which collects the body and
  1334. fires a deferred with it.
  1335. @ivar deferred: See L{__init__}.
  1336. @ivar status: See L{__init__}.
  1337. @ivar message: See L{__init__}.
  1338. @ivar dataBuffer: list of byte-strings received
  1339. @type dataBuffer: L{list} of L{bytes}
  1340. """
  1341. def __init__(self, status, message, deferred):
  1342. """
  1343. @param status: Status of L{IResponse}
  1344. @ivar status: L{int}
  1345. @param message: Message of L{IResponse}
  1346. @type message: L{bytes}
  1347. @param deferred: deferred to fire when response is complete
  1348. @type deferred: L{Deferred} firing with L{bytes}
  1349. """
  1350. self.deferred = deferred
  1351. self.status = status
  1352. self.message = message
  1353. self.dataBuffer = []
  1354. def dataReceived(self, data):
  1355. """
  1356. Accumulate some more bytes from the response.
  1357. """
  1358. self.dataBuffer.append(data)
  1359. def connectionLost(self, reason):
  1360. """
  1361. Deliver the accumulated response bytes to the waiting L{Deferred}, if
  1362. the response body has been completely received without error.
  1363. """
  1364. if reason.check(ResponseDone):
  1365. self.deferred.callback(b"".join(self.dataBuffer))
  1366. elif reason.check(PotentialDataLoss):
  1367. self.deferred.errback(
  1368. PartialDownloadError(
  1369. self.status, self.message, b"".join(self.dataBuffer)
  1370. )
  1371. )
  1372. else:
  1373. self.deferred.errback(reason)
  1374. def readBody(response: IResponse) -> defer.Deferred[bytes]:
  1375. """
  1376. Get the body of an L{IResponse} and return it as a byte string.
  1377. This is a helper function for clients that don't want to incrementally
  1378. receive the body of an HTTP response.
  1379. @param response: The HTTP response for which the body will be read.
  1380. @type response: L{IResponse} provider
  1381. @return: A L{Deferred} which will fire with the body of the response.
  1382. Cancelling it will close the connection to the server immediately.
  1383. """
  1384. def cancel(deferred: defer.Deferred) -> None:
  1385. """
  1386. Cancel a L{readBody} call, close the connection to the HTTP server
  1387. immediately, if it is still open.
  1388. @param deferred: The cancelled L{defer.Deferred}.
  1389. """
  1390. abort = getAbort()
  1391. if abort is not None:
  1392. abort()
  1393. d: defer.Deferred[bytes] = defer.Deferred(cancel)
  1394. protocol = _ReadBodyProtocol(response.code, response.phrase, d)
  1395. def getAbort():
  1396. return getattr(protocol.transport, "abortConnection", None)
  1397. response.deliverBody(protocol)
  1398. if protocol.transport is not None and getAbort() is None:
  1399. warnings.warn(
  1400. "Using readBody with a transport that does not have an "
  1401. "abortConnection method",
  1402. category=DeprecationWarning,
  1403. stacklevel=2,
  1404. )
  1405. return d
  1406. __all__ = [
  1407. "Agent",
  1408. "BrowserLikePolicyForHTTPS",
  1409. "BrowserLikeRedirectAgent",
  1410. "ContentDecoderAgent",
  1411. "CookieAgent",
  1412. "GzipDecoder",
  1413. "HTTPConnectionPool",
  1414. "PartialDownloadError",
  1415. "ProxyAgent",
  1416. "readBody",
  1417. "RedirectAgent",
  1418. "RequestGenerationFailed",
  1419. "RequestTransmissionFailed",
  1420. "Response",
  1421. "ResponseDone",
  1422. "ResponseFailed",
  1423. "ResponseNeverReceived",
  1424. "URI",
  1425. ]