2
0

retryhandler.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. # Copyright (c) 2012-2013 LiuYC https://github.com/liuyichen/
  2. # Copyright 2012-2014 ksyun.com, Inc. or its affiliates. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"). You
  5. # may not use this file except in compliance with the License. A copy of
  6. # the License is located at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # or in the "license" file accompanying this file. This file is
  11. # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  12. # ANY KIND, either express or implied. See the License for the specific
  13. # language governing permissions and limitations under the License.
  14. import random
  15. import functools
  16. import logging
  17. from binascii import crc32
  18. from kscore.vendored.requests import ConnectionError, Timeout
  19. from kscore.vendored.requests.packages.urllib3.exceptions import ClosedPoolError
  20. from kscore.exceptions import ChecksumError, EndpointConnectionError
  21. logger = logging.getLogger(__name__)
  22. # The only supported error for now is GENERAL_CONNECTION_ERROR
  23. # which maps to requests generic ConnectionError. If we're able
  24. # to get more specific exceptions from requests we can update
  25. # this mapping with more specific exceptions.
  26. EXCEPTION_MAP = {
  27. 'GENERAL_CONNECTION_ERROR': [
  28. ConnectionError, ClosedPoolError, Timeout,
  29. EndpointConnectionError
  30. ],
  31. }
  32. def delay_exponential(base, growth_factor, attempts):
  33. """Calculate time to sleep based on exponential function.
  34. The format is::
  35. base * growth_factor ^ (attempts - 1)
  36. If ``base`` is set to 'rand' then a random number between
  37. 0 and 1 will be used as the base.
  38. Base must be greater than 0, otherwise a ValueError will be
  39. raised.
  40. """
  41. if base == 'rand':
  42. base = random.random()
  43. elif base <= 0:
  44. raise ValueError("The 'base' param must be greater than 0, "
  45. "got: %s" % base)
  46. time_to_sleep = base * (growth_factor ** (attempts - 1))
  47. return time_to_sleep
  48. def create_exponential_delay_function(base, growth_factor):
  49. """Create an exponential delay function based on the attempts.
  50. This is used so that you only have to pass it the attempts
  51. parameter to calculate the delay.
  52. """
  53. return functools.partial(
  54. delay_exponential, base=base, growth_factor=growth_factor)
  55. def create_retry_handler(config, operation_name=None):
  56. checker = create_checker_from_retry_config(
  57. config, operation_name=operation_name)
  58. action = create_retry_action_from_config(
  59. config, operation_name=operation_name)
  60. return RetryHandler(checker=checker, action=action)
  61. def create_retry_action_from_config(config, operation_name=None):
  62. # The spec has the possibility of supporting per policy
  63. # actions, but right now, we assume this comes from the
  64. # default section, which means that delay functions apply
  65. # for every policy in the retry config (per service).
  66. delay_config = config['__default__']['delay']
  67. if delay_config['type'] == 'exponential':
  68. return create_exponential_delay_function(
  69. base=delay_config['base'],
  70. growth_factor=delay_config['growth_factor'])
  71. def create_checker_from_retry_config(config, operation_name=None):
  72. checkers = []
  73. max_attempts = None
  74. retryable_exceptions = []
  75. if '__default__' in config:
  76. policies = config['__default__'].get('policies', [])
  77. max_attempts = config['__default__']['max_attempts']
  78. for key in policies:
  79. current_config = policies[key]
  80. checkers.append(_create_single_checker(current_config))
  81. retry_exception = _extract_retryable_exception(current_config)
  82. if retry_exception is not None:
  83. retryable_exceptions.extend(retry_exception)
  84. if operation_name is not None and config.get(operation_name) is not None:
  85. operation_policies = config[operation_name]['policies']
  86. for key in operation_policies:
  87. checkers.append(_create_single_checker(operation_policies[key]))
  88. retry_exception = _extract_retryable_exception(
  89. operation_policies[key])
  90. if retry_exception is not None:
  91. retryable_exceptions.extend(retry_exception)
  92. if len(checkers) == 1:
  93. # Don't need to use a MultiChecker
  94. return MaxAttemptsDecorator(checkers[0], max_attempts=max_attempts)
  95. else:
  96. multi_checker = MultiChecker(checkers)
  97. return MaxAttemptsDecorator(
  98. multi_checker, max_attempts=max_attempts,
  99. retryable_exceptions=tuple(retryable_exceptions))
  100. def _create_single_checker(config):
  101. if 'response' in config['applies_when']:
  102. return _create_single_response_checker(
  103. config['applies_when']['response'])
  104. elif 'socket_errors' in config['applies_when']:
  105. return ExceptionRaiser()
  106. def _create_single_response_checker(response):
  107. if 'service_error_code' in response:
  108. checker = ServiceErrorCodeChecker(
  109. status_code=response['http_status_code'],
  110. error_code=response['service_error_code'])
  111. elif 'http_status_code' in response:
  112. checker = HTTPStatusCodeChecker(
  113. status_code=response['http_status_code'])
  114. elif 'crc32body' in response:
  115. checker = CRC32Checker(header=response['crc32body'])
  116. else:
  117. # TODO: send a signal.
  118. raise ValueError("Unknown retry policy: %s" % config)
  119. return checker
  120. def _extract_retryable_exception(config):
  121. applies_when = config['applies_when']
  122. if 'crc32body' in applies_when.get('response', {}):
  123. return [ChecksumError]
  124. elif 'socket_errors' in applies_when:
  125. exceptions = []
  126. for name in applies_when['socket_errors']:
  127. exceptions.extend(EXCEPTION_MAP[name])
  128. return exceptions
  129. class RetryHandler(object):
  130. """Retry handler.
  131. The retry handler takes two params, ``checker`` object
  132. and an ``action`` object.
  133. The ``checker`` object must be a callable object and based on a response
  134. and an attempt number, determines whether or not sufficient criteria for
  135. a retry has been met. If this is the case then the ``action`` object
  136. (which also is a callable) determines what needs to happen in the event
  137. of a retry.
  138. """
  139. def __init__(self, checker, action):
  140. self._checker = checker
  141. self._action = action
  142. def __call__(self, attempts, response, caught_exception, **kwargs):
  143. """Handler for a retry.
  144. Intended to be hooked up to an event handler (hence the **kwargs),
  145. this will process retries appropriately.
  146. """
  147. if self._checker(attempts, response, caught_exception):
  148. result = self._action(attempts=attempts)
  149. logger.debug("Retry needed, action of: %s", result)
  150. return result
  151. logger.debug("No retry needed.")
  152. class BaseChecker(object):
  153. """Base class for retry checkers.
  154. Each class is responsible for checking a single criteria that determines
  155. whether or not a retry should not happen.
  156. """
  157. def __call__(self, attempt_number, response, caught_exception):
  158. """Determine if retry criteria matches.
  159. Note that either ``response`` is not None and ``caught_exception`` is
  160. None or ``response`` is None and ``caught_exception`` is not None.
  161. :type attempt_number: int
  162. :param attempt_number: The total number of times we've attempted
  163. to send the request.
  164. :param response: The HTTP response (if one was received).
  165. :type caught_exception: Exception
  166. :param caught_exception: Any exception that was caught while trying to
  167. send the HTTP response.
  168. :return: True, if the retry criteria matches (and therefore a retry
  169. should occur. False if the criteria does not match.
  170. """
  171. # The default implementation allows subclasses to not have to check
  172. # whether or not response is None or not.
  173. if response is not None:
  174. return self._check_response(attempt_number, response)
  175. elif caught_exception is not None:
  176. return self._check_caught_exception(attempt_number, caught_exception)
  177. else:
  178. raise ValueError("Both response and caught_exception are None.")
  179. def _check_response(self, attempt_number, response):
  180. pass
  181. def _check_caught_exception(self, attempt_number, caught_exception):
  182. pass
  183. class MaxAttemptsDecorator(BaseChecker):
  184. """Allow retries up to a maximum number of attempts.
  185. This will pass through calls to the decorated retry checker, provided
  186. that the number of attempts does not exceed max_attempts. It will
  187. also catch any retryable_exceptions passed in. Once max_attempts has
  188. been exceeded, then False will be returned or the retryable_exceptions
  189. that was previously being caught will be raised.
  190. """
  191. def __init__(self, checker, max_attempts, retryable_exceptions=None):
  192. self._checker = checker
  193. self._max_attempts = max_attempts
  194. self._retryable_exceptions = retryable_exceptions
  195. def __call__(self, attempt_number, response, caught_exception):
  196. should_retry = self._should_retry(attempt_number, response,
  197. caught_exception)
  198. if should_retry:
  199. if attempt_number >= self._max_attempts:
  200. logger.debug("Reached the maximum number of retry "
  201. "attempts: %s", attempt_number)
  202. return False
  203. else:
  204. return should_retry
  205. else:
  206. return False
  207. def _should_retry(self, attempt_number, response, caught_exception):
  208. if self._retryable_exceptions and \
  209. attempt_number < self._max_attempts:
  210. try:
  211. return self._checker(attempt_number, response, caught_exception)
  212. except self._retryable_exceptions as e:
  213. logger.debug("retry needed, retryable exception caught: %s",
  214. e, exc_info=True)
  215. return True
  216. else:
  217. # If we've exceeded the max attempts we just let the exception
  218. # propogate if one has occurred.
  219. return self._checker(attempt_number, response, caught_exception)
  220. class HTTPStatusCodeChecker(BaseChecker):
  221. def __init__(self, status_code):
  222. self._status_code = status_code
  223. def _check_response(self, attempt_number, response):
  224. if response[0].status_code == self._status_code:
  225. logger.debug(
  226. "retry needed: retryable HTTP status code received: %s",
  227. self._status_code)
  228. return True
  229. else:
  230. return False
  231. class ServiceErrorCodeChecker(BaseChecker):
  232. def __init__(self, status_code, error_code):
  233. self._status_code = status_code
  234. self._error_code = error_code
  235. def _check_response(self, attempt_number, response):
  236. if response[0].status_code == self._status_code:
  237. actual_error_code = response[1].get('Error', {}).get('Code')
  238. if actual_error_code == self._error_code:
  239. logger.debug(
  240. "retry needed: matching HTTP status and error code seen: "
  241. "%s, %s", self._status_code, self._error_code)
  242. return True
  243. return False
  244. class MultiChecker(BaseChecker):
  245. def __init__(self, checkers):
  246. self._checkers = checkers
  247. def __call__(self, attempt_number, response, caught_exception):
  248. for checker in self._checkers:
  249. checker_response = checker(attempt_number, response,
  250. caught_exception)
  251. if checker_response:
  252. return checker_response
  253. return False
  254. class CRC32Checker(BaseChecker):
  255. def __init__(self, header):
  256. # The header where the expected crc32 is located.
  257. self._header_name = header
  258. def _check_response(self, attempt_number, response):
  259. http_response = response[0]
  260. expected_crc = http_response.headers.get(self._header_name)
  261. if expected_crc is None:
  262. logger.debug("crc32 check skipped, the %s header is not "
  263. "in the http response.", self._header_name)
  264. else:
  265. actual_crc32 = crc32(response[0].content) & 0xffffffff
  266. if not actual_crc32 == int(expected_crc):
  267. logger.debug(
  268. "retry needed: crc32 check failed, expected != actual: "
  269. "%s != %s", int(expected_crc), actual_crc32)
  270. raise ChecksumError(checksum_type='crc32',
  271. expected_checksum=int(expected_crc),
  272. actual_checksum=actual_crc32)
  273. class ExceptionRaiser(BaseChecker):
  274. """Raise any caught exceptions.
  275. This class will raise any non None ``caught_exception``.
  276. """
  277. def _check_caught_exception(self, attempt_number, caught_exception):
  278. # This is implementation specific, but this class is useful by
  279. # coordinating with the MaxAttemptsDecorator.
  280. # The MaxAttemptsDecorator has a list of exceptions it should catch
  281. # and retry, but something needs to come along and actually raise the
  282. # caught_exception. That's what this class is being used for. If
  283. # the MaxAttemptsDecorator is not interested in retrying the exception
  284. # then this exception just propogates out past the retry code.
  285. raise caught_exception