response.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. try:
  2. import http.client as httplib
  3. except ImportError:
  4. import httplib
  5. import zlib
  6. import io
  7. from socket import timeout as SocketTimeout
  8. from ._collections import HTTPHeaderDict
  9. from .exceptions import (
  10. ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked
  11. )
  12. from .packages.six import string_types as basestring, binary_type, PY3
  13. from .connection import HTTPException, BaseSSLError
  14. from .util.response import is_fp_closed
  15. class DeflateDecoder(object):
  16. def __init__(self):
  17. self._first_try = True
  18. self._data = binary_type()
  19. self._obj = zlib.decompressobj()
  20. def __getattr__(self, name):
  21. return getattr(self._obj, name)
  22. def decompress(self, data):
  23. if not data:
  24. return data
  25. if not self._first_try:
  26. return self._obj.decompress(data)
  27. self._data += data
  28. try:
  29. return self._obj.decompress(data)
  30. except zlib.error:
  31. self._first_try = False
  32. self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
  33. try:
  34. return self.decompress(self._data)
  35. finally:
  36. self._data = None
  37. class GzipDecoder(object):
  38. def __init__(self):
  39. self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
  40. def __getattr__(self, name):
  41. return getattr(self._obj, name)
  42. def decompress(self, data):
  43. if not data:
  44. return data
  45. return self._obj.decompress(data)
  46. def _get_decoder(mode):
  47. if mode == 'gzip':
  48. return GzipDecoder()
  49. return DeflateDecoder()
  50. class HTTPResponse(io.IOBase):
  51. """
  52. HTTP Response container.
  53. Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
  54. loaded and decoded on-demand when the ``data`` property is accessed. This
  55. class is also compatible with the Python standard library's :mod:`io`
  56. module, and can hence be treated as a readable object in the context of that
  57. framework.
  58. Extra parameters for behaviour not present in httplib.HTTPResponse:
  59. :param preload_content:
  60. If True, the response's body will be preloaded during construction.
  61. :param decode_content:
  62. If True, attempts to decode specific content-encoding's based on headers
  63. (like 'gzip' and 'deflate') will be skipped and raw data will be used
  64. instead.
  65. :param original_response:
  66. When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
  67. object, it's convenient to include the original for debug purposes. It's
  68. otherwise unused.
  69. """
  70. CONTENT_DECODERS = ['gzip', 'deflate']
  71. REDIRECT_STATUSES = [301, 302, 303, 307, 308]
  72. def __init__(self, body='', headers=None, status=0, version=0, reason=None,
  73. strict=0, preload_content=True, decode_content=True,
  74. original_response=None, pool=None, connection=None):
  75. if isinstance(headers, HTTPHeaderDict):
  76. self.headers = headers
  77. else:
  78. self.headers = HTTPHeaderDict(headers)
  79. self.status = status
  80. self.version = version
  81. self.reason = reason
  82. self.strict = strict
  83. self.decode_content = decode_content
  84. self._decoder = None
  85. self._body = None
  86. self._fp = None
  87. self._original_response = original_response
  88. self._fp_bytes_read = 0
  89. if body and isinstance(body, (basestring, binary_type)):
  90. self._body = body
  91. self._pool = pool
  92. self._connection = connection
  93. if hasattr(body, 'read'):
  94. self._fp = body
  95. # Are we using the chunked-style of transfer encoding?
  96. self.chunked = False
  97. self.chunk_left = None
  98. tr_enc = self.headers.get('transfer-encoding', '').lower()
  99. # Don't incur the penalty of creating a list and then discarding it
  100. encodings = (enc.strip() for enc in tr_enc.split(","))
  101. if "chunked" in encodings:
  102. self.chunked = True
  103. # We certainly don't want to preload content when the response is chunked.
  104. if not self.chunked and preload_content and not self._body:
  105. self._body = self.read(decode_content=decode_content)
  106. def get_redirect_location(self):
  107. """
  108. Should we redirect and where to?
  109. :returns: Truthy redirect location string if we got a redirect status
  110. code and valid location. ``None`` if redirect status and no
  111. location. ``False`` if not a redirect status code.
  112. """
  113. if self.status in self.REDIRECT_STATUSES:
  114. return self.headers.get('location')
  115. return False
  116. def release_conn(self):
  117. if not self._pool or not self._connection:
  118. return
  119. self._pool._put_conn(self._connection)
  120. self._connection = None
  121. @property
  122. def data(self):
  123. # For backwords-compat with earlier urllib3 0.4 and earlier.
  124. if self._body:
  125. return self._body
  126. if self._fp:
  127. return self.read(cache_content=True)
  128. def tell(self):
  129. """
  130. Obtain the number of bytes pulled over the wire so far. May differ from
  131. the amount of content returned by :meth:``HTTPResponse.read`` if bytes
  132. are encoded on the wire (e.g, compressed).
  133. """
  134. return self._fp_bytes_read
  135. def _init_decoder(self):
  136. """
  137. Set-up the _decoder attribute if necessar.
  138. """
  139. # Note: content-encoding value should be case-insensitive, per RFC 7230
  140. # Section 3.2
  141. content_encoding = self.headers.get('content-encoding', '').lower()
  142. if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
  143. self._decoder = _get_decoder(content_encoding)
  144. def _decode(self, data, decode_content, flush_decoder):
  145. """
  146. Decode the data passed in and potentially flush the decoder.
  147. """
  148. try:
  149. if decode_content and self._decoder:
  150. data = self._decoder.decompress(data)
  151. except (IOError, zlib.error) as e:
  152. content_encoding = self.headers.get('content-encoding', '').lower()
  153. raise DecodeError(
  154. "Received response with content-encoding: %s, but "
  155. "failed to decode it." % content_encoding, e)
  156. if flush_decoder and decode_content and self._decoder:
  157. buf = self._decoder.decompress(binary_type())
  158. data += buf + self._decoder.flush()
  159. return data
  160. def read(self, amt=None, decode_content=None, cache_content=False):
  161. """
  162. Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
  163. parameters: ``decode_content`` and ``cache_content``.
  164. :param amt:
  165. How much of the content to read. If specified, caching is skipped
  166. because it doesn't make sense to cache partial content as the full
  167. response.
  168. :param decode_content:
  169. If True, will attempt to decode the body based on the
  170. 'content-encoding' header.
  171. :param cache_content:
  172. If True, will save the returned data such that the same result is
  173. returned despite of the state of the underlying file object. This
  174. is useful if you want the ``.data`` property to continue working
  175. after having ``.read()`` the file object. (Overridden if ``amt`` is
  176. set.)
  177. """
  178. self._init_decoder()
  179. if decode_content is None:
  180. decode_content = self.decode_content
  181. if self._fp is None:
  182. return
  183. flush_decoder = False
  184. try:
  185. try:
  186. if amt is None:
  187. # cStringIO doesn't like amt=None
  188. data = self._fp.read()
  189. flush_decoder = True
  190. else:
  191. cache_content = False
  192. data = self._fp.read(amt)
  193. if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
  194. # Close the connection when no data is returned
  195. #
  196. # This is redundant to what httplib/http.client _should_
  197. # already do. However, versions of python released before
  198. # December 15, 2012 (http://bugs.python.org/issue16298) do
  199. # not properly close the connection in all cases. There is
  200. # no harm in redundantly calling close.
  201. self._fp.close()
  202. flush_decoder = True
  203. except SocketTimeout:
  204. # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
  205. # there is yet no clean way to get at it from this context.
  206. raise ReadTimeoutError(self._pool, None, 'Read timed out.')
  207. except BaseSSLError as e:
  208. # FIXME: Is there a better way to differentiate between SSLErrors?
  209. if 'read operation timed out' not in str(e): # Defensive:
  210. # This shouldn't happen but just in case we're missing an edge
  211. # case, let's avoid swallowing SSL errors.
  212. raise
  213. raise ReadTimeoutError(self._pool, None, 'Read timed out.')
  214. except HTTPException as e:
  215. # This includes IncompleteRead.
  216. raise ProtocolError('Connection broken: %r' % e, e)
  217. self._fp_bytes_read += len(data)
  218. data = self._decode(data, decode_content, flush_decoder)
  219. if cache_content:
  220. self._body = data
  221. return data
  222. finally:
  223. if self._original_response and self._original_response.isclosed():
  224. self.release_conn()
  225. def stream(self, amt=2**16, decode_content=None):
  226. """
  227. A generator wrapper for the read() method. A call will block until
  228. ``amt`` bytes have been read from the connection or until the
  229. connection is closed.
  230. :param amt:
  231. How much of the content to read. The generator will return up to
  232. much data per iteration, but may return less. This is particularly
  233. likely when using compressed data. However, the empty string will
  234. never be returned.
  235. :param decode_content:
  236. If True, will attempt to decode the body based on the
  237. 'content-encoding' header.
  238. """
  239. if self.chunked:
  240. for line in self.read_chunked(amt, decode_content=decode_content):
  241. yield line
  242. else:
  243. while not is_fp_closed(self._fp):
  244. data = self.read(amt=amt, decode_content=decode_content)
  245. if data:
  246. yield data
  247. @classmethod
  248. def from_httplib(ResponseCls, r, **response_kw):
  249. """
  250. Given an :class:`httplib.HTTPResponse` instance ``r``, return a
  251. corresponding :class:`urllib3.response.HTTPResponse` object.
  252. Remaining parameters are passed to the HTTPResponse constructor, along
  253. with ``original_response=r``.
  254. """
  255. headers = r.msg
  256. if not isinstance(headers, HTTPHeaderDict):
  257. if PY3: # Python 3
  258. headers = HTTPHeaderDict(headers.items())
  259. else: # Python 2
  260. headers = HTTPHeaderDict.from_httplib(headers)
  261. # HTTPResponse objects in Python 3 don't have a .strict attribute
  262. strict = getattr(r, 'strict', 0)
  263. resp = ResponseCls(body=r,
  264. headers=headers,
  265. status=r.status,
  266. version=r.version,
  267. reason=r.reason,
  268. strict=strict,
  269. original_response=r,
  270. **response_kw)
  271. return resp
  272. # Backwards-compatibility methods for httplib.HTTPResponse
  273. def getheaders(self):
  274. return self.headers
  275. def getheader(self, name, default=None):
  276. return self.headers.get(name, default)
  277. # Overrides from io.IOBase
  278. def close(self):
  279. if not self.closed:
  280. self._fp.close()
  281. @property
  282. def closed(self):
  283. if self._fp is None:
  284. return True
  285. elif hasattr(self._fp, 'closed'):
  286. return self._fp.closed
  287. elif hasattr(self._fp, 'isclosed'): # Python 2
  288. return self._fp.isclosed()
  289. else:
  290. return True
  291. def fileno(self):
  292. if self._fp is None:
  293. raise IOError("HTTPResponse has no file to get a fileno from")
  294. elif hasattr(self._fp, "fileno"):
  295. return self._fp.fileno()
  296. else:
  297. raise IOError("The file-like object this HTTPResponse is wrapped "
  298. "around has no file descriptor")
  299. def flush(self):
  300. if self._fp is not None and hasattr(self._fp, 'flush'):
  301. return self._fp.flush()
  302. def readable(self):
  303. # This method is required for `io` module compatibility.
  304. return True
  305. def readinto(self, b):
  306. # This method is required for `io` module compatibility.
  307. temp = self.read(len(b))
  308. if len(temp) == 0:
  309. return 0
  310. else:
  311. b[:len(temp)] = temp
  312. return len(temp)
  313. def _update_chunk_length(self):
  314. # First, we'll figure out length of a chunk and then
  315. # we'll try to read it from socket.
  316. if self.chunk_left is not None:
  317. return
  318. line = self._fp.fp.readline()
  319. line = line.split(b';', 1)[0]
  320. try:
  321. self.chunk_left = int(line, 16)
  322. except ValueError:
  323. # Invalid chunked protocol response, abort.
  324. self.close()
  325. raise httplib.IncompleteRead(line)
  326. def _handle_chunk(self, amt):
  327. returned_chunk = None
  328. if amt is None:
  329. chunk = self._fp._safe_read(self.chunk_left)
  330. returned_chunk = chunk
  331. self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
  332. self.chunk_left = None
  333. elif amt < self.chunk_left:
  334. value = self._fp._safe_read(amt)
  335. self.chunk_left = self.chunk_left - amt
  336. returned_chunk = value
  337. elif amt == self.chunk_left:
  338. value = self._fp._safe_read(amt)
  339. self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
  340. self.chunk_left = None
  341. returned_chunk = value
  342. else: # amt > self.chunk_left
  343. returned_chunk = self._fp._safe_read(self.chunk_left)
  344. self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
  345. self.chunk_left = None
  346. return returned_chunk
  347. def read_chunked(self, amt=None, decode_content=None):
  348. """
  349. Similar to :meth:`HTTPResponse.read`, but with an additional
  350. parameter: ``decode_content``.
  351. :param decode_content:
  352. If True, will attempt to decode the body based on the
  353. 'content-encoding' header.
  354. """
  355. self._init_decoder()
  356. # FIXME: Rewrite this method and make it a class with a better structured logic.
  357. if not self.chunked:
  358. raise ResponseNotChunked("Response is not chunked. "
  359. "Header 'transfer-encoding: chunked' is missing.")
  360. if self._original_response and self._original_response._method.upper() == 'HEAD':
  361. # Don't bother reading the body of a HEAD request.
  362. # FIXME: Can we do this somehow without accessing private httplib _method?
  363. self._original_response.close()
  364. return
  365. while True:
  366. self._update_chunk_length()
  367. if self.chunk_left == 0:
  368. break
  369. chunk = self._handle_chunk(amt)
  370. yield self._decode(chunk, decode_content=decode_content,
  371. flush_decoder=True)
  372. # Chunk content ends with \r\n: discard it.
  373. while True:
  374. line = self._fp.fp.readline()
  375. if not line:
  376. # Some sites may not end with '\r\n'.
  377. break
  378. if line == b'\r\n':
  379. break
  380. # We read everything; close the "file".
  381. if self._original_response:
  382. self._original_response.close()
  383. self.release_conn()