translate.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. # Copyright (c) 2012-2013 LiuYC https://github.com/liuyichen/
  2. # Copyright 2012-2014 ksyun.com, Inc. or its affiliates. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"). You
  5. # may not use this file except in compliance with the License. A copy of
  6. # the License is located at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # or in the "license" file accompanying this file. This file is
  11. # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  12. # ANY KIND, either express or implied. See the License for the specific
  13. # language governing permissions and limitations under the License.
  14. """Translate the raw json files into python specific descriptions."""
  15. import os
  16. import re
  17. from copy import deepcopy
  18. import jmespath
  19. from kscore.compat import OrderedDict, json
  20. from kscore.utils import merge_dicts
  21. from kscore import xform_name
  22. class ModelFiles(object):
  23. """Container object to hold all the various parsed json files.
  24. Includes:
  25. * The json service description.
  26. * The _retry.json file.
  27. * The <service>.extra.json enhancements file.
  28. * The name of the service.
  29. """
  30. def __init__(self, model, retry, enhancements, name=''):
  31. self.model = model
  32. self.retry = retry
  33. self.enhancements = enhancements
  34. self.name = name
  35. def load_model_files(args):
  36. model = json.load(open(args.modelfile),
  37. object_pairs_hook=OrderedDict)
  38. retry = json.load(open(args.retry_file),
  39. object_pairs_hook=OrderedDict)
  40. enhancements = _load_enhancements_file(args.enhancements_file)
  41. service_name = os.path.splitext(os.path.basename(args.modelfile))[0]
  42. return ModelFiles(model, retry, enhancements, name=service_name)
  43. def _load_enhancements_file(file_path):
  44. if not os.path.isfile(file_path):
  45. return {}
  46. else:
  47. return json.load(open(file_path),
  48. object_pairs_hook=OrderedDict)
  49. def translate(model):
  50. new_model = deepcopy(model.model)
  51. new_model.update(model.enhancements.get('extra', {}))
  52. try:
  53. del new_model['pagination']
  54. except KeyError:
  55. pass
  56. handle_op_renames(new_model, model.enhancements)
  57. handle_remove_deprecated_params(new_model, model.enhancements)
  58. handle_remove_deprecated_operations(new_model, model.enhancements)
  59. handle_filter_documentation(new_model, model.enhancements)
  60. handle_rename_params(new_model, model.enhancements)
  61. add_pagination_configs(
  62. new_model,
  63. model.enhancements.get('pagination', {}))
  64. add_waiter_configs(
  65. new_model,
  66. model.enhancements.get('waiters', {}))
  67. # Merge in any per operation overrides defined in the .extras.json file.
  68. merge_dicts(new_model['operations'],
  69. model.enhancements.get('operations', {}))
  70. add_retry_configs(
  71. new_model, model.retry.get('retry', {}),
  72. definitions=model.retry.get('definitions', {}))
  73. return new_model
  74. def handle_op_renames(new_model, enhancements):
  75. # This allows for operations to be renamed. The only
  76. # implemented transformation is removing part of the operation name
  77. # (because that's all we currently need.)
  78. remove = enhancements.get('transformations', {}).get(
  79. 'operation-name', {}).get('remove')
  80. if remove is not None:
  81. # We're going to recreate the dictionary because we want to preserve
  82. # the order. This is the only option we have unless we have our own
  83. # custom OrderedDict.
  84. remove_regex = re.compile(remove)
  85. operations = new_model['operations']
  86. new_operation = OrderedDict()
  87. for key in operations:
  88. new_key = remove_regex.sub('', key)
  89. new_operation[new_key] = operations[key]
  90. new_model['operations'] = new_operation
  91. def handle_remove_deprecated_operations(new_model, enhancements):
  92. # This removes any operation whose documentation string contains
  93. # the specified phrase that marks a deprecated parameter.
  94. keyword = enhancements.get('transformations', {}).get(
  95. 'remove-deprecated-operations', {}).get('deprecated_keyword')
  96. remove = []
  97. if keyword is not None:
  98. operations = new_model['operations']
  99. for op_name in operations:
  100. operation = operations[op_name]
  101. if operation:
  102. docs = operation['documentation']
  103. if docs and docs.find(keyword) >= 0:
  104. remove.append(op_name)
  105. for op in remove:
  106. del new_model['operations'][op]
  107. def handle_remove_deprecated_params(new_model, enhancements):
  108. # This removes any parameter whose documentation string contains
  109. # the specified phrase that marks a deprecated parameter.
  110. keyword = enhancements.get('transformations', {}).get(
  111. 'remove-deprecated-params', {}).get('deprecated_keyword')
  112. if keyword is not None:
  113. operations = new_model['operations']
  114. for op_name in operations:
  115. operation = operations[op_name]
  116. params = operation.get('input', {}).get('members')
  117. if params:
  118. new_params = OrderedDict()
  119. for param_name in params:
  120. param = params[param_name]
  121. docs = param['documentation']
  122. if docs and docs.find(keyword) >= 0:
  123. continue
  124. new_params[param_name] = param
  125. operation['input']['members'] = new_params
  126. def _filter_param_doc(param, replacement, regex):
  127. # Recurse into complex parameters looking for documentation.
  128. doc = param.get('documentation')
  129. if doc:
  130. param['documentation'] = regex.sub(replacement, doc)
  131. if param['type'] == 'structure':
  132. for member_name in param['members']:
  133. member = param['members'][member_name]
  134. _filter_param_doc(member, replacement, regex)
  135. if param['type'] == 'map':
  136. _filter_param_doc(param['keys'], replacement, regex)
  137. _filter_param_doc(param['members'], replacement, regex)
  138. elif param['type'] == 'list':
  139. _filter_param_doc(param['members'], replacement, regex)
  140. def handle_filter_documentation(new_model, enhancements):
  141. # This provides a way to filter undesireable content (e.g. CDATA)
  142. # from documentation strings.
  143. doc_filter = enhancements.get('transformations', {}).get(
  144. 'filter-documentation', {}).get('filter')
  145. if doc_filter is not None:
  146. filter_regex = re.compile(doc_filter.get('regex', ''), re.DOTALL)
  147. replacement = doc_filter.get('replacement')
  148. operations = new_model['operations']
  149. for op_name in operations:
  150. operation = operations[op_name]
  151. doc = operation.get('documentation')
  152. if doc:
  153. new_doc = filter_regex.sub(replacement, doc)
  154. operation['documentation'] = new_doc
  155. params = operation.get('input', {}).get('members')
  156. if params:
  157. for param_name in params:
  158. param = params[param_name]
  159. _filter_param_doc(param, replacement, filter_regex)
  160. def handle_rename_params(new_model, enhancements):
  161. renames = enhancements.get('transformations', {}).get(
  162. 'renames', {})
  163. if not renames:
  164. return
  165. # This is *extremely* specific to kscore's translations, but
  166. # we support a restricted set of argument renames based on a
  167. # jmespath expression.
  168. for expression, new_value in renames.items():
  169. # First we take everything up until the last dot.
  170. parent_expression, key = expression.rsplit('.', 1)
  171. matched = jmespath.search(parent_expression, new_model['operations'])
  172. current = matched[key]
  173. del matched[key]
  174. matched[new_value] = current
  175. def resembles_jmespath_exp(value):
  176. # For now, we'll do a naive check.
  177. if '.' in value or '[' in value:
  178. return True
  179. return False
  180. def add_pagination_configs(new_model, pagination):
  181. # Adding in pagination configs means copying the config to a top level
  182. # 'pagination' key in the new model, and it also means adding the
  183. # pagination config to each individual operation.
  184. # Also, the input_token needs to be transformed to the python specific
  185. # name, so we're adding a py_input_token (e.g. NextToken -> next_token).
  186. if pagination:
  187. new_model['pagination'] = pagination
  188. for name in pagination:
  189. config = pagination[name]
  190. _check_known_pagination_keys(config)
  191. if 'py_input_token' not in config:
  192. _add_py_input_token(config)
  193. _validate_result_key_exists(config)
  194. _validate_referenced_operation_exists(new_model, name)
  195. operation = new_model['operations'][name]
  196. _validate_operation_has_output(operation, name)
  197. _check_input_keys_match(config, operation)
  198. _check_output_keys_match(config, operation,
  199. new_model.get('endpoint_prefix', ''))
  200. operation['pagination'] = config.copy()
  201. def _validate_operation_has_output(operation, name):
  202. if not operation['output']:
  203. raise ValueError("Trying to add pagination config for an "
  204. "operation with no output members: %s" % name)
  205. def _validate_referenced_operation_exists(new_model, name):
  206. if name not in new_model['operations']:
  207. raise ValueError("Trying to add pagination config for non "
  208. "existent operation: %s" % name)
  209. def _validate_result_key_exists(config):
  210. # result_key must be defined.
  211. if 'result_key' not in config:
  212. raise ValueError("Required key 'result_key' is missing from "
  213. "from pagination config: %s" % config)
  214. def _add_py_input_token(config):
  215. input_token = config['input_token']
  216. if isinstance(input_token, list):
  217. py_input_token = []
  218. for token in input_token:
  219. py_input_token.append(xform_name(token))
  220. config['py_input_token'] = py_input_token
  221. else:
  222. config['py_input_token'] = xform_name(input_token)
  223. def add_waiter_configs(new_model, waiters):
  224. if waiters:
  225. denormalized = denormalize_waiters(waiters)
  226. # Before adding it to the new model, we need to verify the
  227. # final denormalized model.
  228. for value in denormalized.values():
  229. if value['operation'] not in new_model['operations']:
  230. raise ValueError()
  231. new_model['waiters'] = denormalized
  232. def denormalize_waiters(waiters):
  233. # The waiter configuration is normalized to avoid duplication.
  234. # You can inherit defaults, and extend from other definitions.
  235. # We're going to denormalize this so that the implementation for
  236. # consuming waiters is simple.
  237. default = waiters.get('__default__', {})
  238. new_waiters = {}
  239. for key, value in waiters.items():
  240. if key.startswith('__'):
  241. # Keys that start with '__' are considered abstract/internal
  242. # and are only used for inheritance. Because we're going
  243. # to denormalize the configs and perform all the lookups
  244. # during this translation process, the abstract/internal
  245. # configs don't need to make it into the final translated
  246. # config so we can just skip these.
  247. continue
  248. new_waiters[key] = denormalize_single_waiter(value, default, waiters)
  249. return new_waiters
  250. def denormalize_single_waiter(value, default, waiters):
  251. """Denormalize a single waiter config.
  252. :param value: The dictionary of a single waiter config, e.g.
  253. the ``InstanceRunning`` or ``TableExists`` config. This
  254. is the config we're going to denormalize.
  255. :param default: The ``__default__`` (if any) configuration.
  256. This is needed to resolve the lookup process.
  257. :param waiters: The full configuration of the waiters.
  258. This is needed if we need to look up at parent class that the
  259. current config extends.
  260. :return: The denormalized config.
  261. :rtype: dict
  262. """
  263. # First we need to resolve all the keys based on the inheritance
  264. # hierarchy. The lookup process is:
  265. # The most bottom/leaf class is ``value``. From there we need
  266. # to look up anything it inherits from (denoted via the ``extends``
  267. # key). We need to perform this process recursively until we hit
  268. # a config that has no ``extends`` key.
  269. # And finally if we haven't found our value yet, we check in the
  270. # ``__default__`` key.
  271. # So the first thing we need to do is build the lookup chain that
  272. # starts with ``value`` and ends with ``__default__``.
  273. lookup_chain = [value]
  274. current = value
  275. while True:
  276. if 'extends' not in current:
  277. break
  278. current = waiters[current.get('extends')]
  279. lookup_chain.append(current)
  280. lookup_chain.append(default)
  281. new_waiter = {}
  282. # Now that we have this lookup chain we can build the entire set
  283. # of values by starting at the most parent class and walking down
  284. # to the children. At each step the child is merged onto the parent's
  285. # config items. This is the desired behavior as a child's values
  286. # overrides its parents. This is what the ``reversed(...)`` call
  287. # is for.
  288. for element in reversed(lookup_chain):
  289. new_waiter.update(element)
  290. # We don't care about 'extends' so we can safely remove that key.
  291. new_waiter.pop('extends', {})
  292. # Now we need to resolve the success/failure values. We
  293. # want to completely remove the acceptor types.
  294. # The logic here is that if there is no success/failure_* variable
  295. # defined, it inherits this value from the matching acceptor_* variable.
  296. new_waiter['success_type'] = new_waiter.get(
  297. 'success_type', new_waiter.get('acceptor_type'))
  298. new_waiter['success_path'] = new_waiter.get(
  299. 'success_path', new_waiter.get('acceptor_path'))
  300. new_waiter['success_value'] = new_waiter.get(
  301. 'success_value', new_waiter.get('acceptor_value'))
  302. new_waiter['failure_type'] = new_waiter.get(
  303. 'failure_type', new_waiter.get('acceptor_type'))
  304. new_waiter['failure_path'] = new_waiter.get(
  305. 'failure_path', new_waiter.get('acceptor_path'))
  306. new_waiter['failure_value'] = new_waiter.get(
  307. 'failure_value', new_waiter.get('acceptor_value'))
  308. # We can remove acceptor_* vars because they're only used for lookups
  309. # and we've already performed this step in the lines above.
  310. new_waiter.pop('acceptor_type', '')
  311. new_waiter.pop('acceptor_path', '')
  312. new_waiter.pop('acceptor_value', '')
  313. # Remove any keys with a None value.
  314. for key in list(new_waiter.keys()):
  315. if new_waiter[key] is None:
  316. del new_waiter[key]
  317. # Check required keys.
  318. for required in ['operation', 'success_type']:
  319. if required not in new_waiter:
  320. raise ValueError('Missing required waiter configuration '
  321. 'value "%s": %s' % (required, new_waiter))
  322. if new_waiter.get(required) is None:
  323. raise ValueError('Required waiter configuration '
  324. 'value cannot be None "%s": %s' %
  325. (required, new_waiter))
  326. # Finally, success/failure values can be a scalar or a list. We're going
  327. # to just always make them a list.
  328. if 'success_value' in new_waiter and not \
  329. isinstance(new_waiter['success_value'], list):
  330. new_waiter['success_value'] = [new_waiter['success_value']]
  331. if 'failure_value' in new_waiter and not \
  332. isinstance(new_waiter['failure_value'], list):
  333. new_waiter['failure_value'] = [new_waiter['failure_value']]
  334. _transform_waiter(new_waiter)
  335. return new_waiter
  336. def _transform_waiter(new_waiter):
  337. # This transforms the waiters into a format that's slightly
  338. # easier to consume.
  339. if 'success_type' in new_waiter:
  340. success = {'type': new_waiter.pop('success_type')}
  341. if 'success_path' in new_waiter:
  342. success['path'] = new_waiter.pop('success_path')
  343. if 'success_value' in new_waiter:
  344. success['value'] = new_waiter.pop('success_value')
  345. new_waiter['success'] = success
  346. if 'failure_type' in new_waiter:
  347. failure = {'type': new_waiter.pop('failure_type')}
  348. if 'failure_path' in new_waiter:
  349. failure['path'] = new_waiter.pop('failure_path')
  350. if 'failure_value' in new_waiter:
  351. failure['value'] = new_waiter.pop('failure_value')
  352. new_waiter['failure'] = failure
  353. def _check_known_pagination_keys(config):
  354. # Verify that the pagination config only has keys we expect to see.
  355. expected = set(['input_token', 'py_input_token', 'output_token',
  356. 'result_key', 'limit_key', 'more_results',
  357. 'non_aggregate_keys'])
  358. for key in config:
  359. if key not in expected:
  360. raise ValueError("Unknown key in pagination config: %s" % key)
  361. def _check_output_keys_match(config, operation, service_name):
  362. output_members = list(operation['output']['members'])
  363. jmespath_seen = False
  364. for output_key in _get_all_page_output_keys(config):
  365. if resembles_jmespath_exp(output_key):
  366. # We don't validate jmespath expressions for now.
  367. jmespath_seen = True
  368. continue
  369. if output_key not in output_members:
  370. raise ValueError("Key %r is not an output member: %s" %
  371. (output_key,
  372. output_members))
  373. output_members.remove(output_key)
  374. # Some services echo the input parameters in the response
  375. # output. We should not trigger a validation error
  376. # if those params are still not accounted for.
  377. for input_name in operation['input']['members']:
  378. if input_name in output_members:
  379. output_members.remove(input_name)
  380. if not jmespath_seen and output_members:
  381. # Because we can't validate jmespath expressions yet,
  382. # we can't say for user if output_members actually has
  383. # remaining keys or not.
  384. if service_name == 's3' and output_members == ['Name']:
  385. # The S3 model uses 'Name' for the output key, which
  386. # actually maps to the 'Bucket' input param so we don't
  387. # need to validate this output member. This is the only
  388. # model that has this, so we can just special case this
  389. # for now.
  390. return
  391. raise ValueError("Output members still exist for operation %s: %s" % (
  392. operation['name'], output_members))
  393. def _get_all_page_output_keys(config):
  394. if not isinstance(config['result_key'], list):
  395. yield config['result_key']
  396. else:
  397. for result_key in config['result_key']:
  398. yield result_key
  399. if not isinstance(config['output_token'], list):
  400. yield config['output_token']
  401. else:
  402. for result_key in config['output_token']:
  403. yield result_key
  404. if 'more_results' in config:
  405. yield config['more_results']
  406. for key in config.get('non_aggregate_keys', []):
  407. yield key
  408. def _check_input_keys_match(config, operation):
  409. input_tokens = config['input_token']
  410. if not isinstance(input_tokens, list):
  411. input_tokens = [input_tokens]
  412. valid_input_names = operation['input']['members']
  413. for token in input_tokens:
  414. if token not in valid_input_names:
  415. raise ValueError("input_token refers to a non existent "
  416. "input name for operation %s: %s. "
  417. "Must be one of: %s" % (operation['name'], token,
  418. list(valid_input_names)))
  419. if 'limit_key' in config and config['limit_key'] not in valid_input_names:
  420. raise ValueError("limit_key refers to a non existent input name for "
  421. "operation %s: %s. Must be one of: %s" % (
  422. operation['name'], config['limit_key'],
  423. list(valid_input_names)))
  424. def add_retry_configs(new_model, retry_model, definitions):
  425. if not retry_model:
  426. new_model['retry'] = {}
  427. return
  428. # The service specific retry config is keyed off of the endpoint
  429. # prefix as defined in the JSON model.
  430. endpoint_prefix = new_model.get('endpoint_prefix', '')
  431. final_retry_config = build_retry_config(endpoint_prefix, retry_model,
  432. definitions)
  433. new_model['retry'] = final_retry_config
  434. def build_retry_config(endpoint_prefix, retry_model, definitions):
  435. service_config = retry_model.get(endpoint_prefix, {})
  436. resolve_references(service_config, definitions)
  437. # We want to merge the global defaults with the service specific
  438. # defaults, with the service specific defaults taking precedence.
  439. # So we use the global defaults as the base.
  440. final_retry_config = {'__default__': retry_model.get('__default__', {})}
  441. resolve_references(final_retry_config, definitions)
  442. # The merge the service specific config on top.
  443. merge_dicts(final_retry_config, service_config)
  444. return final_retry_config
  445. def resolve_references(config, definitions):
  446. """Recursively replace $ref keys.
  447. To cut down on duplication, common definitions can be declared
  448. (and passed in via the ``definitions`` attribute) and then
  449. references as {"$ref": "name"}, when this happens the reference
  450. dict is placed with the value from the ``definition`` dict.
  451. This is recursively done.
  452. """
  453. for key, value in config.items():
  454. if isinstance(value, dict):
  455. if len(value) == 1 and list(value.keys())[0] == '$ref':
  456. # Then we need to resolve this reference.
  457. config[key] = definitions[list(value.values())[0]]
  458. else:
  459. resolve_references(value, definitions)