diff --git a/bdc_api/bdc_api.py b/bdc_api/bdc_api.py index 5211c05..f50293e 100644 --- a/bdc_api/bdc_api.py +++ b/bdc_api/bdc_api.py @@ -30,6 +30,7 @@ class BdcApi(object): URL_QUERY='minos_restapi/request_data' URL_PROGRESS='minos_restapi/progress' URL_DOWNLOAD='minos_restapi/download' + URL_VALIDATE_CATALOGS='minos_restapi/validate_catalogs' COMPLETE_QUERY = '100%' QUERY_ACCEPT_TYPES = ['application/zip', 'application/x-hdf'] COMPLETE_QUERY_STATUS = ['failed', 'cancelled', 'failed (no read access to any data included)', @@ -333,6 +334,101 @@ def save_file(self, query_id, jupyterhub=False, local_path=""): response = self._save_file_local(str(query_id), local_path) return response + def validate_catalogs(self, file_or_dir_path, batch_size=10): + """Check whether the given JSON file is a valid catalog, or + whether the JSONs in the given directory are valid catalogs. + + Parameters: + + :file_or_dir_path: A relative or absolute file path to a + JSON file or a directory (to be + recursively traversed for JSONs) + :batch_size: The number of JSONs to be opened (in memory) + and sent to the API at once (default 10) + + Returns: + + - Generator of tuples (JSON file path (str), validation + result (dict)). Example: + + ( + "path/to/catalog1.json", + { + "is_valid": True, + "warnings": "", + "errors": "" + } + ) + + ( + "path/to/nested/catalog2.json", + { + "is_valid": False, + "warnings": "", + "errors": "Sample error message" + } + ) + + Raises: + + - BdcApiException, if any errors occur. + """ + + def _get_validation_results_for_files(paths): + """Open the files at the given paths, send them to the API + endpoint for catalog validation, close them, and return the + validation results.""" + json_content_type = 'application/json' + files = {} + try: + for path in paths: + f = open(path) + files[path] = (path, f, json_content_type) + except Exception as e: + raise BdcApiException('Failed to open a file.') from e + + response, exc = None, None + try: + response = self._send_post( + self.URL_VALIDATE_CATALOGS, {}, files=files) + except Exception as e: + exc = e + + # Close file handles even if the request failed. + for _, file_data in files.items(): + try: + file_handle = file_data[1] + file_handle.close() + except Exception as e: + pass + + if exc is not None: + raise BdcApiException(exc) + + try: + results = response.json()['results'] + return results.items() + except Exception as e: + raise BdcApiException(e) + + if not os.path.exists(file_or_dir_path): + raise BdcApiException(f'{file_or_dir_path} is not a valid path.') + if os.path.isfile(file_or_dir_path): + file_paths = [file_or_dir_path] + yield from _get_validation_results_for_files(file_paths) + elif os.path.isdir(file_or_dir_path): + file_paths = [] + for file_path in self._yield_jsons_in_directory(file_or_dir_path): + file_paths.append(file_path) + if len(file_paths) == batch_size: + yield from _get_validation_results_for_files(file_paths) + file_paths = [] + if file_paths: + yield from _get_validation_results_for_files(file_paths) + else: + raise BdcApiException( + f'{file_or_dir_path} is neither a file nor a directory.') + def _save_file_local(self, query_id, path): """Helper function that attempts to save the results of the given query to the given directory on local disk. @@ -445,14 +541,15 @@ def _send_get(self, url, parameters=None, headers={}): raise BdcApiException('Error occurred while making request: {0}', json.loads(response.content)['errormessage']) return response - - def _send_post(self, url, post_data, headers={}): + + def _send_post(self, url, post_data, files={}, headers={}): """Helper function to send POST requests. Parameters: :url: of API endpoint suffix as a string. :post_data: Data to attach to POST request. + :files: Files to include in the request. :headers: Request headers to include. Returns: @@ -466,9 +563,15 @@ def _send_post(self, url, post_data, headers={}): """ headers.update({'Authorization': self.auth_header}) response = None + kwargs = { + 'data': post_data, + 'headers': headers, + } + if files: + kwargs['files'] = files try: response = self.session.post( - '{0}/{1}/'.format(self._host, url), data=post_data, headers=headers) + '{0}/{1}/'.format(self._host, url), **kwargs) except Exception as e: if not response: raise BdcApiException('Error sending request to host server: {0}', e) @@ -480,3 +583,26 @@ def _send_post(self, url, post_data, headers={}): raise BdcApiException('Error occurred while making request: {0}', json.loads(response.content)['errormessage']) return response + + @staticmethod + def _yield_jsons_in_directory(directory_path): + """Return a generator of paths to JSON files in the given + directory. + + Parameters: + + :directory_path: The path to a directory to search within + + Returns: + + - A generator of paths to JSON files relative to the + directory + + Raises: + - Exception, if any errors occur. + """ + for root, _, file_names in os.walk(directory_path): + for file_name in file_names: + if not file_name.endswith('.json'): + continue + yield os.path.join(root, file_name) diff --git a/tests/test_bdc_api.py b/tests/test_bdc_api.py index 7c120a0..dc71935 100644 --- a/tests/test_bdc_api.py +++ b/tests/test_bdc_api.py @@ -3,6 +3,7 @@ # import os # import shutil from bdc_api import * +from unittest.mock import mock_open from unittest.mock import patch import unittest @@ -214,5 +215,89 @@ def test_check_query_progress_fail_2(self, mock_get): self.assertRaises(BdcApiException, self.api.check_query_progress, query_id) + @patch('bdc_api.BdcApi._send_post') + @patch('bdc_api.BdcApi._yield_jsons_in_directory') + @patch('builtins.open', new_callable=mock_open, read_data='') + def test_validate_catalogs_success_1(self, mock_file, mock_yield, + mock_post): + """Ensure that validate_catalogs works as expected on a + directory input""" + mock_response_json = { + 'all_valid': True, + 'results': { + '/path/to/valid0.json': { + 'is_valid': True, + 'warnings': '', + 'errors': '', + }, + '/path/to/valid1.json': { + 'is_valid': True, + 'warnings': '', + 'errors': '', + }, + }, + } + mock_post.return_value.json.return_value = mock_response_json + + mock_yield.return_value = mock_response_json['results'].keys() + + expected_results = mock_response_json['results'] + file_names_and_results = list(self.api.validate_catalogs('/tmp')) + for file_name, result in file_names_and_results: + self.assertIn(file_name, expected_results) + expected_result = expected_results.pop(file_name) + self.assertEqual(expected_result, result) + self.assertFalse(expected_results) + + @patch('bdc_api.BdcApi._send_post') + @patch('os.path.isfile') + @patch('os.path.exists') + @patch('builtins.open', new_callable=mock_open, read_data='') + def test_validate_catalogs_success_2(self, mock_file, mock_exists, + mock_isfile, mock_post): + """Ensure that validate_catalogs works as expected on a file + input""" + mock_response_json = { + 'all_valid': False, + 'results': { + '/path/to/invalid.json': { + 'is_valid': False, + 'warnings': 'Sample warning', + 'errors': 'Sample error', + }, + }, + } + mock_post.return_value.json.return_value = mock_response_json + + mock_exists.return_value = True + mock_isfile.return_value = True + + expected_results = mock_response_json['results'] + file_names_and_results = list( + self.api.validate_catalogs('/path/to/invalid.json')) + for file_name, result in file_names_and_results: + self.assertIn(file_name, expected_results) + expected_result = expected_results.pop(file_name) + self.assertEqual(expected_result, result) + self.assertFalse(expected_results) + + @patch('bdc_api.BdcApi._send_post') + @patch('bdc_api.BdcApi._yield_jsons_in_directory') + @patch('builtins.open', new_callable=mock_open, read_data='') + def test_validate_catalogs_fail_1(self, mock_file, mock_yield, mock_post): + """Ensure that validate_catalogs fails as expected when the API + call raises an exception""" + + def raise_exception(*args, **kwargs): + raise Exception('Test exception.') + + mock_post.side_effect = raise_exception + + mock_yield.return_value = ['dummy.json'] + + with self.assertRaises(BdcApiException) as cm: + list(self.api.validate_catalogs('/tmp')) + self.assertEqual(str(cm.exception), 'Test exception.') + if __name__ == '__main__': unittest.main() \ No newline at end of file