From 0308139ba9eaf92cd54fd52f6bd136c0571fa3ec Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 7 Oct 2025 16:45:04 -0700 Subject: [PATCH 01/35] BREAKING CHANGE : removing cumulus code --- .../cumulus_wrapper/__init__.py | 0 .../cumulus_wrapper/cumulus_base.py | 63 --- .../cumulus_wrapper/query_collections.py | 488 ------------------ .../cumulus_wrapper/query_granules.py | 232 --------- .../uds_api/catalog_api.py | 9 +- .../uds_api/collections_api.py | 94 +--- .../uds_api/dapa/collections_dapa_creation.py | 80 +-- .../uds_api/dapa/collections_dapa_query.py | 110 ---- .../uds_api/dapa/granules_dapa_query.py | 231 --------- .../uds_api/granules_api.py | 18 - .../cumulus_wrapper/__init__.py | 0 .../cumulus_wrapper/test_query_collection.py | 189 ------- .../cumulus_wrapper/test_query_granules.py | 30 -- 13 files changed, 12 insertions(+), 1532 deletions(-) delete mode 100644 cumulus_lambda_functions/cumulus_wrapper/__init__.py delete mode 100644 cumulus_lambda_functions/cumulus_wrapper/cumulus_base.py delete mode 100644 cumulus_lambda_functions/cumulus_wrapper/query_collections.py delete mode 100644 cumulus_lambda_functions/cumulus_wrapper/query_granules.py delete mode 100644 cumulus_lambda_functions/uds_api/dapa/collections_dapa_query.py delete mode 100644 cumulus_lambda_functions/uds_api/dapa/granules_dapa_query.py delete mode 100644 tests/cumulus_lambda_functions/cumulus_wrapper/__init__.py delete mode 100644 tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py delete mode 100644 tests/cumulus_lambda_functions/cumulus_wrapper/test_query_granules.py diff --git a/cumulus_lambda_functions/cumulus_wrapper/__init__.py b/cumulus_lambda_functions/cumulus_wrapper/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/cumulus_lambda_functions/cumulus_wrapper/cumulus_base.py b/cumulus_lambda_functions/cumulus_wrapper/cumulus_base.py deleted file mode 100644 index 213eafb8..00000000 --- a/cumulus_lambda_functions/cumulus_wrapper/cumulus_base.py +++ /dev/null @@ -1,63 +0,0 @@ -import boto3 -import json - -from abc import ABC -from copy import deepcopy - - -class CumulusBase(ABC): - def __init__(self, cumulus_base: str, cumulus_token: str): - self.__cumulus_base = cumulus_base[:-1] if cumulus_base.endswith('/') else cumulus_base - self.__cumulus_token = cumulus_token - self.__base_headers = { - 'Authorization': f'Bearer {cumulus_token}' - } - self._conditions = [] - - def with_page_number(self, page_number): - self._conditions.append(f'page={page_number}') - return self - - def with_limit(self, limit: int): - self._conditions.append(f'limit={limit}') - return self - - def get_base_headers(self): - return deepcopy(self.__base_headers) - - def _invoke_api(self, payload, private_api_prefix: str): - """Function to invoke cumulus api via aws lambda""" - client = boto3.client('lambda') - response = client.invoke( - FunctionName=f'{private_api_prefix}-PrivateApiLambda', - Payload=json.dumps(payload), - ) - json_response_payload = response.get('Payload').read().decode('utf-8') - response_data = json.loads(json_response_payload) - return response_data - - @property - def cumulus_base(self): - return self.__cumulus_base - - @cumulus_base.setter - def cumulus_base(self, val): - """ - :param val: - :return: None - """ - self.__cumulus_base = val - return - - @property - def cumulus_token(self): - return self.__cumulus_token - - @cumulus_token.setter - def cumulus_token(self, val): - """ - :param val: - :return: None - """ - self.__cumulus_token = val - return diff --git a/cumulus_lambda_functions/cumulus_wrapper/query_collections.py b/cumulus_lambda_functions/cumulus_wrapper/query_collections.py deleted file mode 100644 index 0c43b3c6..00000000 --- a/cumulus_lambda_functions/cumulus_wrapper/query_collections.py +++ /dev/null @@ -1,488 +0,0 @@ -import json -import re - -import requests -from mdps_ds_lib.lib.cumulus_stac.collection_transformer import CollectionTransformer - -from cumulus_lambda_functions.cumulus_wrapper.cumulus_base import CumulusBase -from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator - -LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) - - -class CollectionsQuery(CumulusBase): - __collections_key = 'collections' - __providers_key = 'providers' - __rules_key = 'rules' - __stats_key = 'stats' - __collection_id_key = 'collectionId' - __collection_name = 'name' - __collection_version = 'version' - - def __init__(self, cumulus_base: str, cumulus_token: str): - super().__init__(cumulus_base, cumulus_token) - - def with_collection_id(self, collection_id: str): - # self._conditions.append(f'{self.__collection_id_key}={collection_id}') - split_collection = collection_id.split('___') - self._conditions.append(f'{self.__collection_name}={split_collection[0]}') - self._conditions.append(f'{self.__collection_version}={split_collection[1]}') - - return self - - def with_collections(self, collection_ids: list): - collection_names = [k.split('___')[0] for k in collection_ids] - self._conditions.append(f'{self.__collection_name}__in={",".join(collection_names)}') - return self - - def get_size(self, private_api_prefix: str): - query_params = {'field': 'status', 'type': 'collections'} - main_conditions = {k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]} - if self.__collection_name in main_conditions: - query_params[self.__collection_name] = main_conditions[self.__collection_name] - if self.__collection_version in main_conditions: - query_params[self.__collection_version] = main_conditions[self.__collection_version] - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/stats/aggregate', - 'queryStringParameters': query_params, - 'headers': { - 'Content-Type': 'application/json', - }, - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} - """ - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - if query_result['statusCode'] >= 500: - raise ValueError(f'server_error: {query_result.statusCode}. details: {query_result}') - if query_result['statusCode'] >= 400: - raise ValueError(f'client_error: {query_result.statusCode}. details: {query_result}') - query_result = json.loads(query_result['body']) - LOGGER.info(f'json query_result: {query_result}') - if 'meta' not in query_result or 'count' not in query_result['meta']: - raise ValueError(f'server_error: missing key: results. invalid response json: {query_result}') - total_size = query_result['meta']['count'] - return {'total_size': total_size} - - def create_collection(self, new_collection: dict, private_api_prefix: str): - payload = { - 'httpMethod': 'POST', - 'resource': '/{proxy+}', - 'path': f'/{self.__collections_key}', - 'headers': { - 'Content-Type': 'application/json', - }, - 'body': json.dumps(new_collection) - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 500, 'body': '', 'headers': {}} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'message' not in query_result: - return {'server_error': f'invalid response: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'status': query_result['message']} - - def delete_collection(self, private_api_prefix, collection_id, collection_version): - payload = { - 'httpMethod': 'DELETE', - 'resource': '/{proxy+}', - 'path': f'/{self.__collections_key}/{collection_id}/{collection_version}', - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 500, 'body': '', 'headers': {}} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'message' not in query_result: - return {'server_error': f'invalid response: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'status': query_result['message']} - - def query_rules(self, private_api_prefix: str): - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/{self.__rules_key}', - # 'queryStringParameters': {k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}, - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result.statusCode}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result.statusCode}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'results' not in query_result: - LOGGER.error(f'missing key: results. invalid response json: {query_result}') - return {'server_error': f'missing key: results. invalid response json: {query_result}'} - query_result = query_result['results'] - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'results': query_result} - - def delete_sqs_rules(self, new_collection: dict, private_api_prefix: str): - # $ curl --request DELETE https://example.com/rules/repeat_test --header 'Authorization: Bearer ReplaceWithTheToken' - underscore_collection_name = re.sub(r'[^a-zA-Z0-9_]', '___', new_collection["name"]) # replace any character that's not alphanumeric or underscore with 3 underscores - rule_name = f'{underscore_collection_name}___{new_collection["version"]}___rules_sqs' - payload = { - 'httpMethod': 'DELETE', - 'resource': '/{proxy+}', - 'path': f'/{self.__rules_key}/{rule_name}', - 'headers': { - 'Content-Type': 'application/json', - }, - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 500, 'body': '', 'headers': {}} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'message' not in query_result: - return {'server_error': f'invalid response: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'status': query_result['message']} - - def delete_executions(self, new_collection: dict, private_api_prefix: str): - # $ curl --request DELETE https://example.com/rules/repeat_test --header 'Authorization: Bearer ReplaceWithTheToken' - request_body = { - "collectionId": f'{new_collection["name"]}___{new_collection["version"]}', - "esBatchSize": 10000, - "dbBatchSize": 50000 - } - payload = { - 'httpMethod': 'POST', - 'resource': '/{proxy+}', - 'path': f'/executions/bulk-delete-by-collection', - 'headers': { - 'Content-Type': 'application/json', - }, - 'body': json.dumps(request_body) - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 500, 'body': '', 'headers': {}} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'id' not in query_result: - return {'server_error': f'invalid response: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'status': query_result} - - def list_executions(self, new_collection: dict, private_api_prefix: str): - # $ curl --request DELETE https://example.com/rules/repeat_test --header 'Authorization: Bearer ReplaceWithTheToken' - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/executions', - 'queryStringParameters': {'limit': '100', 'collectionId': f'{new_collection["name"]}___{new_collection["version"]}'}, - 'headers': { - 'Content-Type': 'application/json', - } - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 500, 'body': '', 'headers': {}} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'results' not in query_result: - return {'server_error': f'invalid response: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'results': query_result['results']} - - def create_sqs_rules(self, new_collection: dict, private_api_prefix: str, sqs_url: str, provider_name: str = '', workflow_name: str = 'CatalogGranule', visibility_timeout: int = 1800): - """ -curl --request POST "$CUMULUS_BASEURL/rules" --header "Authorization: Bearer $cumulus_token" --header 'Content-Type: application/json' --data '{ - "workflow": "DiscoverGranules", - "collection": { - "name": "ATMS_SCIENCE_Group", - "version": "001" - }, - "provider": "snpp_provider_03", - "name": "ATMS_SCIENCE_Group_2016_002_v1", - "rule": { - "type": "onetime" - }, - "meta": { "provider_path": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/002/", "publish": false, "distribution_endpoint": "s3://am-uds-dev-cumulus-internal/" }, - "state": "ENABLED" -}' - :return: - """ - underscore_collection_name = re.sub(r'[^a-zA-Z0-9_]', '___', new_collection["name"]) # replace any character that's not alphanumeric or underscore with 3 underscores - LOGGER.debug(f'underscore_collection_name: {underscore_collection_name}') - rule_body = { - 'workflow': workflow_name, - 'collection': { - 'name': new_collection['name'], - 'version': new_collection['version'], - }, - # 'provider': provider_name, - 'name': f'{underscore_collection_name}___{new_collection["version"]}___rules_sqs', - 'rule': { - # 'type': 'onetime', - 'type': 'sqs', - 'value': sqs_url, - }, - 'state': 'ENABLED', - "meta": { - 'retries': 1, - 'visibilityTimeout': visibility_timeout, - # "provider_path": "data/SNPP_ATMS_Level0_T/ATMS_SCIENCE_Group/2016/002/", - # "publish": False, - # "distribution_endpoint": "s3://am-uds-dev-cumulus-internal/" - }, - - } - if provider_name is not None and provider_name != '': - rule_body['provider'] = provider_name - LOGGER.info(f'rule_body: {rule_body}') - payload = { - 'httpMethod': 'POST', - 'resource': '/{proxy+}', - 'path': f'/{self.__rules_key}', - 'headers': { - 'Content-Type': 'application/json', - }, - 'body': json.dumps(rule_body) - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 500, 'body': '', 'headers': {}} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'message' not in query_result: - return {'server_error': f'invalid response: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'status': query_result['message']} - - def __get_stats(self, collection_id, private_api_prefix: str): - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/{self.__collections_key}', - 'queryStringParameters': {k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}, - } - try: - query_stats_result = self._invoke_api(payload, private_api_prefix) - except: - LOGGER.exception(f'error while trying to retrieve stats for collection: {collection_id}') - return {} - if query_stats_result['statusCode'] >= 400: - return {} - return query_stats_result['results'] - - def get_stats(self, collection_id:str, private_api_prefix: str): - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/stats', - 'queryStringParameters': {'type': 'granules', 'collectionId': collection_id}, - 'headers': { - 'Content-Type': 'application/json', - }, - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} - """ - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - if query_result['statusCode'] >= 500: - raise ValueError(f'server_error: {query_result.statusCode}. details: {query_result}') - if query_result['statusCode'] >= 400: - raise ValueError(f'client_error: {query_result.statusCode}. details: {query_result}') - query_result = json.loads(query_result['body']) - LOGGER.info(f'json query_result: {query_result}') - if 'granules' not in query_result: - raise ValueError(f'server_error: missing key: results. invalid response json: {query_result}') - stats = query_result['granules'] - return stats - - def query_direct_to_private_api(self, private_api_prefix: str, output_base_url: str): - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/{self.__collections_key}', - 'queryStringParameters': {k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}, - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result.statusCode}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result.statusCode}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'results' not in query_result: - LOGGER.error(f'missing key: results. invalid response json: {query_result}') - return {'server_error': f'missing key: results. invalid response json: {query_result}'} - query_result = query_result['results'] - for each_collection in query_result: - stac_collection_id = f"{each_collection['name']}___{each_collection['version']}" - stats = self.get_stats(stac_collection_id, private_api_prefix) - each_collection['dateFrom'] = stats['dateFrom'] - each_collection['dateTo'] = stats['dateTo'] - each_collection['total_size'] = stats['value'] - stac_list = [CollectionTransformer(items_base_url=output_base_url).to_stac(k) for k in query_result] - LOGGER.debug(f'stac_list: {stac_list}') - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'results': stac_list} - - def query(self): - conditions_str = '&'.join(self._conditions) - LOGGER.info(f'cumulus_base: {self.cumulus_base}') - LOGGER.info(f'get_base_headers: {self.get_base_headers()}') - try: - query_result = requests.get(url=f'{self.cumulus_base}/{self.__collections_key}?{conditions_str}', headers=self.get_base_headers()) - LOGGER.info(f'query_result: {query_result}') - if query_result.status_code >= 500: - return {'server_error': query_result.text} - if query_result.status_code >= 400: - return {'client_error': query_result.text} - query_result = json.loads(query_result.content.decode()) - LOGGER.info(f'query_result: {query_result}') - if 'results' not in query_result: - return {'server_error': f'missing key: results. invalid response json: {query_result}'} - query_result = query_result['results'] - except Exception as e: - LOGGER.exception('error during cumulus query') - return {'server_error': str(e)} - return {'results': [CollectionTransformer().to_stac(k) for k in query_result]} - - def create_provider(self, provider_name: str, s3_bucket: str, private_api_prefix: str): - # INSERT INTO providers (name, protocol, host) VALUES ('unity', 's3', 'https://dev.mdps.mcp.nasa.gov'); - # TODO : this fails - payload = { - 'httpMethod': 'POST', - 'resource': '/{proxy+}', - 'path': f'/{self.__providers_key}', - 'headers': { - 'Content-Type': 'application/json', - }, - 'body': json.dumps({ - "id": provider_name, - "host": s3_bucket, - "protocol": "s3", - # "port": 443, - "globalConnectionLimit": 1000, - # "maxDownloadTime": 300, - # "username": "na", - # "password": "na", - # "privateKey": "na", - # "cmKeyId": "na", - # "allowedRedirects": "na", - }) - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 500, 'body': '', 'headers': {}} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.debug(f'json query_result: {query_result}') - if 'message' not in query_result: - return {'server_error': f'invalid response: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'status': query_result['message']} diff --git a/cumulus_lambda_functions/cumulus_wrapper/query_granules.py b/cumulus_lambda_functions/cumulus_wrapper/query_granules.py deleted file mode 100644 index e10da64d..00000000 --- a/cumulus_lambda_functions/cumulus_wrapper/query_granules.py +++ /dev/null @@ -1,232 +0,0 @@ -import json - -import boto3 -import requests -from mdps_ds_lib.lib.cumulus_stac.item_transformer import ItemTransformer - -from cumulus_lambda_functions.cumulus_wrapper.cumulus_base import CumulusBase -from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator - -LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) - - -class GranulesQuery(CumulusBase): - __granules_key = 'granules' - __ending_time_key = 'endingDateTime' - __beginning_time_key = 'beginningDateTime' - __collection_id_key = 'collectionId' - __granules_id = 'granuleId' - - def __init__(self, cumulus_base: str, cumulus_token: str): - super().__init__(cumulus_base, cumulus_token) - self._conditions.append('status=completed') - self._item_transformer = ItemTransformer() - self.__collection_id = None - - def with_filter(self, filter_key, filter_values: list): - if len(filter_values) < 1: - return self - if filter_key not in self._item_transformer.STAC_2_CUMULUS_KEYS_MAP: - LOGGER.error(f'unknown key in STAC_2_CUMULUS_KEYS_MAP: {filter_key} ') - return self - filter_key = self._item_transformer.STAC_2_CUMULUS_KEYS_MAP[filter_key] - self._conditions.append(f'{filter_key}__in={",".join(filter_values)}') - return self - - def with_collection_id(self, collection_id: str): - self._conditions.append(f'{self.__collection_id_key}={collection_id}') - self.__collection_id = collection_id - return self - - def with_bbox(self): - return self - - def with_time_from(self, from_time): - self._conditions.append(f'{self.__ending_time_key}__from={from_time}') - return self - - def with_time_to(self, to_time): - self._conditions.append(f'{self.__beginning_time_key}__to={to_time}') - return self - - def with_time(self, input_time): - self._conditions.append(f'{self.__beginning_time_key}__from={input_time}') - self._conditions.append(f'{self.__ending_time_key}__to={input_time}') - return self - - def with_time_range(self, from_time, to_time): - """ - - curl -k "$CUMULUS_BASEURL/granules?limit=1&beginningDateTime__from=2016-01-18T22:00:00&endingDateTime__to=2016-01-20T22:00:00" --header "Authorization: Bearer $cumulus_token"|jq - :param beginning_dt: - :param ending_dt: - :return: - """ - self._conditions.append(f'{self.__ending_time_key}__from={from_time}') - self._conditions.append(f'{self.__beginning_time_key}__to={to_time}') - return self - - def get_size(self, private_api_prefix: str): - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/stats/aggregate', - 'queryStringParameters': {**{k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}, **{'field': 'status', 'type': 'granules'}}, - 'headers': { - 'Content-Type': 'application/json', - }, - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} - """ - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - if query_result['statusCode'] >= 500: - raise ValueError(f'server_error: {query_result.statusCode}. details: {query_result}') - if query_result['statusCode'] >= 400: - raise ValueError(f'client_error: {query_result.statusCode}. details: {query_result}') - query_result = json.loads(query_result['body']) - LOGGER.info(f'json query_result: {query_result}') - if 'meta' not in query_result or 'count' not in query_result['meta']: - raise ValueError(f'server_error: missing key: results. invalid response json: {query_result}') - total_size = query_result['meta']['count'] - return {'total_size': total_size} - - def query_direct_to_private_api(self, private_api_prefix: str, transform=True): - payload = { - 'httpMethod': 'GET', - 'resource': '/{proxy+}', - 'path': f'/{self.__granules_key}', - 'queryStringParameters': {**{k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}}, - # 'queryStringParameters': {'limit': '30'}, - 'headers': { - 'Content-Type': 'application/json', - }, - # 'body': json.dumps({"action": "removeFromCmr"}) - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} - """ - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result.statusCode}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result.statusCode}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - LOGGER.info(f'json query_result: {query_result}') - if 'results' not in query_result: - LOGGER.error(f'missing key: results. invalid response json: {query_result}') - return {'server_error': f'missing key: results. invalid response json: {query_result}'} - query_result = query_result['results'] - stac_list = [ItemTransformer().to_stac(k) for k in query_result] if transform is True else query_result - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {'results': stac_list} - - def add_entry(self, private_api_prefix: str, new_granule: dict): - raise NotImplementedError(f'Please implement adding granules to Cumulus') - # https://nasa.github.io/cumulus-api/v18.4.0/#create-granule - # payload = { - # 'httpMethod': 'POST', - # 'resource': '/{proxy+}', - # 'path': f'/{self.__collections_key}', - # 'headers': { - # 'Content-Type': 'application/json', - # }, - # 'body': json.dumps(new_granule) - # } - # LOGGER.debug(f'payload: {payload}') - # try: - # query_result = self._invoke_api(payload, private_api_prefix) - # """ - # {'statusCode': 500, 'body': '', 'headers': {}} - # """ - # if query_result['statusCode'] >= 500: - # LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - # return {'server_error': query_result} - # if query_result['statusCode'] >= 400: - # LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - # return {'client_error': query_result} - # query_result = json.loads(query_result['body']) - # LOGGER.debug(f'json query_result: {query_result}') - # if 'message' not in query_result: - # return {'server_error': f'invalid response: {query_result}'} - # except Exception as e: - # LOGGER.exception('error while invoking') - # return {'server_error': f'error while invoking:{str(e)}'} - # return {'status': query_result['message']} - return - - def delete_entry(self, private_api_prefix: str, granule_id: str): - payload = { - 'httpMethod': 'DELETE', - 'resource': '/{proxy+}', - 'path': f'/{self.__granules_key}/{self.__collection_id}/{granule_id}', - 'queryStringParameters': {**{k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}}, - # 'queryStringParameters': {'limit': '30'}, - 'headers': { - 'Content-Type': 'application/json', - }, - # 'body': json.dumps({"action": "removeFromCmr"}) - } - LOGGER.debug(f'payload: {payload}') - try: - query_result = self._invoke_api(payload, private_api_prefix) - """ - {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} - """ - LOGGER.debug(f'json query_result: {query_result}') - if query_result['statusCode'] >= 500: - LOGGER.error(f'server error status code: {query_result["statusCode"]}. details: {query_result}') - return {'server_error': query_result} - if query_result['statusCode'] >= 400: - LOGGER.error(f'client error status code: {query_result["statusCode"]}. details: {query_result}') - return {'client_error': query_result} - query_result = json.loads(query_result['body']) - """ - { - "detail": "Record deleted" - } - """ - if 'detail' not in query_result: - LOGGER.error(f'missing key: detail. invalid response json: {query_result}') - return {'server_error': f'missing key: detail. invalid response json: {query_result}'} - if query_result['detail'] != 'Record deleted': - LOGGER.error(f'Wrong Message: {query_result}') - return {'server_error': f'Wrong Message: {query_result}'} - except Exception as e: - LOGGER.exception('error while invoking') - return {'server_error': f'error while invoking:{str(e)}'} - return {} - - def query(self, transform=True): - conditions_str = '&'.join(self._conditions) - LOGGER.info(f'cumulus_base: {self.cumulus_base}') - LOGGER.info(f'get_base_headers: {self.get_base_headers()}') - try: - query_result = requests.get(url=f'{self.cumulus_base}/{self.__granules_key}?{conditions_str}', headers=self.get_base_headers()) - LOGGER.info(f'query_result: {query_result}') - if query_result.status_code >= 500: - return {'server_error': query_result.text} - if query_result.status_code >= 400: - return {'client_error': query_result.text} - query_result = json.loads(query_result.content.decode()) - LOGGER.info(f'query_result: {query_result}') - if 'results' not in query_result: - return {'server_error': f'missing key: results. invalid response json: {query_result}'} - query_result = query_result['results'] - except Exception as e: - LOGGER.exception('error during cumulus query') - return {'server_error': str(e)} - if transform is True: - return {'results': [ItemTransformer().to_stac(k) for k in query_result]} - return query_result diff --git a/cumulus_lambda_functions/uds_api/catalog_api.py b/cumulus_lambda_functions/uds_api/catalog_api.py index 521c77cc..41ab4e25 100644 --- a/cumulus_lambda_functions/uds_api/catalog_api.py +++ b/cumulus_lambda_functions/uds_api/catalog_api.py @@ -15,7 +15,6 @@ from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator from fastapi import APIRouter, HTTPException, Request, Response -from cumulus_lambda_functions.uds_api.dapa.collections_dapa_query import CollectionDapaQuery from cumulus_lambda_functions.uds_api.dapa.pagination_links_generator import PaginationLinksGenerator from cumulus_lambda_functions.uds_api.web_service_constants import WebServiceConstants @@ -50,13 +49,7 @@ async def get_catalog(request: Request, limit: Union[int, None] = 10, offset: Un # NOTE: 2022-11-21: only pass collections. not versions try: - custom_params = {} - if limit > CollectionDapaQuery.max_limit: - LOGGER.debug(f'incoming limit > {CollectionDapaQuery.max_limit}. resetting to max. incoming limit: {limit}') - limit = CollectionDapaQuery.max_limit - custom_params['limit'] = limit - LOGGER.debug(f'new limit: {limit}') - pg_link_generator = PaginationLinksGenerator(request, custom_params) + pg_link_generator = PaginationLinksGenerator(request) catalog = Catalog( id='unity_ds', diff --git a/cumulus_lambda_functions/uds_api/collections_api.py b/cumulus_lambda_functions/uds_api/collections_api.py index 6a7c2d2e..40a8d416 100644 --- a/cumulus_lambda_functions/uds_api/collections_api.py +++ b/cumulus_lambda_functions/uds_api/collections_api.py @@ -22,8 +22,7 @@ from cumulus_lambda_functions.uds_api.dapa.collections_dapa_cnm import CnmRequestBody, CollectionsDapaCnm from cumulus_lambda_functions.uds_api.dapa.collections_dapa_creation import CollectionDapaCreation, \ CumulusCollectionModel -from cumulus_lambda_functions.uds_api.dapa.collections_dapa_query import CollectionDapaQuery -from cumulus_lambda_functions.uds_api.dapa.pagination_links_generator import PaginationLinksGenerator +# from cumulus_lambda_functions.uds_api.dapa.pagination_links_generator import PaginationLinksGenerator from cumulus_lambda_functions.uds_api.web_service_constants import WebServiceConstants from fastapi.responses import PlainTextResponse, JSONResponse @@ -185,99 +184,12 @@ async def create_new_collection_real(request: Request, new_collection: CumulusCo @router.get("/{collection_id}") @router.get("/{collection_id}/") async def get_single_collection(request: Request, collection_id: str, limit: Union[int, None] = 10, offset: Union[int, None] = 0, ): - LOGGER.debug(f'starting get_single_collection: {collection_id}') - LOGGER.debug(f'starting get_single_collection request: {request}') - - authorizer: UDSAuthorizorAbstract = UDSAuthorizerFactory() \ - .get_instance(UDSAuthorizerFactory.cognito, - es_url=os.getenv('ES_URL'), - es_port=int(os.getenv('ES_PORT', '443')) - ) - auth_info = FastApiUtils.get_authorization_info(request) - uds_collections = UdsCollections(es_url=os.getenv('ES_URL'), - es_port=int(os.getenv('ES_PORT', '443')), es_type=os.getenv('ES_TYPE', 'AWS')) - if collection_id is None or collection_id == '': - raise HTTPException(status_code=500, detail=f'missing or invalid collection_id: {collection_id}') - collection_identifier = uds_collections.decode_identifier(collection_id) - if not authorizer.is_authorized_for_collection(DBConstants.read, collection_id, - auth_info['ldap_groups'], - collection_identifier.tenant, - collection_identifier.venue): - LOGGER.debug(f'user: {auth_info["username"]} is not authorized for {collection_id}') - raise HTTPException(status_code=403, detail=json.dumps({ - 'message': 'not authorized to execute this action' - })) - - try: - custom_params = {} - if limit > CollectionDapaQuery.max_limit: - LOGGER.debug(f'incoming limit > {CollectionDapaQuery.max_limit}. resetting to max. incoming limit: {limit}') - limit = CollectionDapaQuery.max_limit - custom_params['limit'] = limit - LOGGER.debug(f'new limit: {limit}') - pg_link_generator = PaginationLinksGenerator(request, custom_params) - api_base_prefix = FastApiUtils.get_api_base_prefix() - collections_dapa_query = CollectionDapaQuery(collection_id, limit, offset, None, - f'{pg_link_generator.base_url}/{api_base_prefix}') - collections_result = collections_dapa_query.get_collection() - except Exception as e: - LOGGER.exception('failed during get_granules_dapa') - raise HTTPException(status_code=500, detail=str(e)) - if collections_result['statusCode'] == 200: - return collections_result['body'] - raise HTTPException(status_code=collections_result['statusCode'], detail=collections_result['body']) + raise HTTPException(status_code=301, detail='Please use FAST API to get collection details') @router.get("") @router.get("/") async def query_collections(request: Request, collection_id: Union[str, None] = None, limit: Union[int, None] = 10, offset: Union[int, None] = 0, ): - LOGGER.debug(f'starting query_collections: {collection_id}') - LOGGER.debug(f'starting query_collections request: {request}') - - authorizer: UDSAuthorizorAbstract = UDSAuthorizerFactory() \ - .get_instance(UDSAuthorizerFactory.cognito, - es_url=os.getenv('ES_URL'), - es_port=int(os.getenv('ES_PORT', '443')) - ) - auth_info = FastApiUtils.get_authorization_info(request) - uds_collections = UdsCollections(es_url=os.getenv('ES_URL'), - es_port=int(os.getenv('ES_PORT', '443')), es_type=os.getenv('ES_TYPE', 'AWS')) - if collection_id is not None: - collection_identifier = uds_collections.decode_identifier(collection_id) - if not authorizer.is_authorized_for_collection(DBConstants.read, collection_id, - auth_info['ldap_groups'], - collection_identifier.tenant, - collection_identifier.venue): - LOGGER.debug(f'user: {auth_info["username"]} is not authorized for {collection_id}') - raise HTTPException(status_code=403, detail=json.dumps({ - 'message': 'not authorized to execute this action' - })) - else: - collection_regexes = authorizer.get_authorized_collections(DBConstants.read, auth_info['ldap_groups']) - LOGGER.info(f'collection_regexes: {collection_regexes}') - authorized_collections = uds_collections.get_collections(collection_regexes) - LOGGER.info(f'authorized_collections: {authorized_collections}') - collection_id = [k[DBConstants.collection_id] for k in authorized_collections] - LOGGER.info(f'authorized_collection_ids: {collection_id}') - # NOTE: 2022-11-21: only pass collections. not versions - - try: - custom_params = {} - if limit > CollectionDapaQuery.max_limit: - LOGGER.debug(f'incoming limit > {CollectionDapaQuery.max_limit}. resetting to max. incoming limit: {limit}') - limit = CollectionDapaQuery.max_limit - custom_params['limit'] = limit - LOGGER.debug(f'new limit: {limit}') - pg_link_generator = PaginationLinksGenerator(request, custom_params) - pagination_links = pg_link_generator.generate_pagination_links() - api_base_prefix = FastApiUtils.get_api_base_prefix() - collections_dapa_query = CollectionDapaQuery(collection_id, limit, offset, pagination_links, f'{pg_link_generator.base_url}/{api_base_prefix}') - collections_result = collections_dapa_query.start() - except Exception as e: - LOGGER.exception('failed during get_granules_dapa') - raise HTTPException(status_code=500, detail=str(e)) - if collections_result['statusCode'] == 200: - return collections_result['body'] - raise HTTPException(status_code=collections_result['statusCode'], detail=collections_result['body']) + raise HTTPException(status_code=301, detail='Please use FAST API to get collection details') @router.delete("/{collection_id}") @router.delete("/{collection_id}/") diff --git a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py b/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py index 6b111a9d..a41618ee 100644 --- a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py +++ b/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py @@ -1,25 +1,18 @@ import json import os -from time import sleep from typing import Optional import pystac from pydantic import BaseModel - from mdps_ds_lib.lib.utils.time_utils import TimeUtils - from cumulus_lambda_functions.lib.uds_db.uds_collections import UdsCollections from starlette.datastructures import URL - -from cumulus_lambda_functions.cumulus_wrapper.query_collections import CollectionsQuery - from mdps_ds_lib.lib.cumulus_stac.collection_transformer import CollectionTransformer - from mdps_ds_lib.lib.aws.aws_lambda import AwsLambda - from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + class SummariesModel(BaseModel): granuleId: list[str] granuleIdExtraction: list[str] @@ -30,12 +23,14 @@ class ExtentModel(BaseModel): temporal: dict spatial: dict + class CumulusLinkModel(BaseModel): rel: str href: str type: Optional[str] = '' title: Optional[str] = '' + class CumulusCollectionModel(BaseModel): """ {"type": "Collection", "id": "URN:NASA:UNITY:MAIN_PROJECT:DEV:CUMULUS_DAPA_UNIT_TEST___1697248243", "stac_version": "1.0.0", @@ -64,6 +59,7 @@ class CumulusCollectionModel(BaseModel): providers: list[dict] extent: ExtentModel + class CollectionDapaCreation: def __init__(self, request_body): required_env = ['CUMULUS_LAMBDA_PREFIX', 'CUMULUS_WORKFLOW_SQS_URL'] @@ -80,7 +76,6 @@ def __init__(self, request_body): self.__provider_id = os.getenv('UNITY_DEFAULT_PROVIDER', '') self.__collection_transformer = CollectionTransformer(self.__report_to_ems) self.__uds_collection = UdsCollections(es_url=os.getenv('ES_URL'), es_port=int(os.getenv('ES_PORT', '443')), es_type=os.getenv('ES_TYPE', 'AWS'), use_ssl=os.getenv('ES_USE_SSL', 'TRUE').strip() is True) - self.__cumulus_collection_query = CollectionsQuery('', '') def analyze_cumulus_result(self, cumulus_request_result): if 'status' not in cumulus_request_result: @@ -93,7 +88,6 @@ def analyze_cumulus_result(self, cumulus_request_result): }, None return None, cumulus_request_result - def __delete_collection_uds(self): try: delete_collection_result = self.__uds_collection.delete_collection( @@ -110,7 +104,6 @@ def __delete_collection_uds(self): return None def __create_collection_uds(self, cumulus_collection_doc): - try: time_range = self.__collection_transformer.get_collection_time_range() self.__uds_collection.add_collection( @@ -123,10 +116,6 @@ def __create_collection_uds(self, cumulus_collection_doc): except Exception as e: LOGGER.exception(f'failed to add collection to Elasticsearch') delete_collection_result = 'NA' - if self.__include_cumulus: - delete_collection_result = self.__cumulus_collection_query.delete_collection(self.__cumulus_lambda_prefix, - cumulus_collection_doc['name'], - cumulus_collection_doc['version']) return { 'statusCode': 500, 'body': { @@ -144,33 +133,9 @@ def delete(self): self.__provider_id = self.__provider_id if self.__collection_transformer.output_provider is None else self.__collection_transformer.output_provider LOGGER.debug(f'__provider_id: {self.__provider_id}') creation_result = 'NA' - - if self.__include_cumulus: - result = self.__cumulus_collection_query.list_executions(cumulus_collection_doc, self.__cumulus_lambda_prefix) - LOGGER.debug(f'execution list result: {result}') - if len(result['results']) > 0: - self.__delete_collection_execution(cumulus_collection_doc, deletion_result) - return { - 'statusCode': 409, - 'body': { - 'message': f'There are cumulus executions for this collection. Deleting them. Pls try again in a few minutes.', - } - } - # self.__delete_collection_execution(cumulus_collection_doc, deletion_result) - self.__delete_collection_rule(cumulus_collection_doc, deletion_result) - delete_result = self.__cumulus_collection_query.delete_collection(self.__cumulus_lambda_prefix, cumulus_collection_doc['name'], cumulus_collection_doc['version']) - delete_err, delete_result = self.analyze_cumulus_result(delete_result) - if delete_err is not None: - LOGGER.error(f'deleting collection ends in error. Trying again. {delete_err}') - # self.__delete_collection_execution(cumulus_collection_doc, deletion_result) - self.__delete_collection_rule(cumulus_collection_doc, deletion_result) - delete_result = self.__cumulus_collection_query.delete_collection(self.__cumulus_lambda_prefix, cumulus_collection_doc['name'], cumulus_collection_doc['version']) - delete_err, delete_result = self.analyze_cumulus_result(delete_result) - deletion_result['cumulus_collection_deletion'] = delete_err if delete_err is not None else delete_result - else: - deletion_result['cumulus_executions_deletion'] = 'NA' - deletion_result['cumulus_rule_deletion'] = 'NA' - deletion_result['cumulus_collection_deletion'] = 'NA' + deletion_result['cumulus_executions_deletion'] = 'NA' + deletion_result['cumulus_rule_deletion'] = 'NA' + deletion_result['cumulus_collection_deletion'] = 'NA' uds_deletion_result = self.__delete_collection_uds() deletion_result['uds_collection_deletion'] = uds_deletion_result if uds_deletion_result is not None else 'succeeded' @@ -191,45 +156,16 @@ def delete(self): } } - def __delete_collection_rule(self, cumulus_collection_doc, deletion_result): - if 'cumulus_rule_deletion' in deletion_result and 'statusCode' not in deletion_result['cumulus_rule_deletion']: - return - rule_deletion_result = self.__cumulus_collection_query.delete_sqs_rules(cumulus_collection_doc, self.__cumulus_lambda_prefix) - rule_delete_err, rule_delete_result = self.analyze_cumulus_result(rule_deletion_result) - deletion_result['cumulus_rule_deletion'] = rule_delete_err if rule_delete_err is not None else rule_delete_result - return - - def __delete_collection_execution(self, cumulus_collection_doc, deletion_result): - executions_delete_result = self.__cumulus_collection_query.delete_executions(cumulus_collection_doc, self.__cumulus_lambda_prefix) - exec_delete_err, exec_delete_result = self.analyze_cumulus_result(executions_delete_result) - deletion_result['cumulus_executions_deletion'] = exec_delete_err if exec_delete_err is not None else exec_delete_result - sleep(10) - return def create(self): try: cumulus_collection_doc = self.__collection_transformer.from_stac(self.__request_body) self.__provider_id = self.__provider_id if self.__collection_transformer.output_provider is None else self.__collection_transformer.output_provider LOGGER.debug(f'__provider_id: {self.__provider_id}') creation_result = 'NA' - if self.__include_cumulus: - creation_cumulus_result = self.__cumulus_collection_query.create_collection(cumulus_collection_doc, self.__cumulus_lambda_prefix) - creation_err, creation_result = self.analyze_cumulus_result(creation_cumulus_result) - if creation_err is not None: - return creation_err uds_creation_result = self.__create_collection_uds(cumulus_collection_doc) if uds_creation_result is not None: return uds_creation_result - if self.__include_cumulus: - rule_creation_result = self.__cumulus_collection_query.create_sqs_rules( - cumulus_collection_doc, - self.__cumulus_lambda_prefix, - self.__ingest_sqs_url, - self.__provider_id, - self.__workflow_name, - ) - create_rule_err, create_rule_result = self.analyze_cumulus_result(rule_creation_result) - if create_rule_err is not None: - return create_rule_err + # validation_result = pystac.Collection.from_dict(self.__request_body).validate() # cumulus_collection_query = CollectionsQuery('', '') # diff --git a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_query.py b/cumulus_lambda_functions/uds_api/dapa/collections_dapa_query.py deleted file mode 100644 index 55696aa7..00000000 --- a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_query.py +++ /dev/null @@ -1,110 +0,0 @@ -import json -import os - -from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator - -from cumulus_lambda_functions.cumulus_wrapper.query_collections import CollectionsQuery -LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) - - -class CollectionDapaQuery: - max_limit = 50 - - def __init__(self, collection_id, limit, offset, pagination_links, base_url): - self.__base_url = base_url - self.__pagination_links = pagination_links - self.__collection_id = collection_id - page_number = (offset // limit) + 1 - if 'CUMULUS_LAMBDA_PREFIX' not in os.environ: - raise EnvironmentError('missing key: CUMULUS_LAMBDA_PREFIX') - - self.__cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX') - self.__cumulus = CollectionsQuery('https://na/dev', 'NA') - self.__cumulus.with_limit(limit) - LOGGER.debug(f'collection_id: {collection_id}') - if collection_id is not None: - if isinstance(collection_id, str): - self.__cumulus.with_collection_id(collection_id) - else: - self.__cumulus.with_collections(collection_id) - self.__cumulus.with_page_number(page_number) - - def __get_size(self): - try: - cumulus_size = self.__cumulus.get_size(self.__cumulus_lambda_prefix) - except: - LOGGER.exception(f'cannot get cumulus_size') - cumulus_size = {'total_size': -1} - return cumulus_size - - def get_collection(self): - """ - A method to retrieve a single collection. - :return: - """ - if self.__collection_id is None or self.__collection_id == '': - return { - 'statusCode': 500, - 'body': {'message': f'missing or invalid collection ID: {self.__collection_id}'} - } - try: - cumulus_result = self.__cumulus.query_direct_to_private_api(self.__cumulus_lambda_prefix, self.__base_url) - if 'server_error' in cumulus_result: - return { - 'statusCode': 500, - 'body': {'message': cumulus_result['server_error']} - } - if 'client_error' in cumulus_result: - return { - 'statusCode': 400, - 'body': {'message': cumulus_result['client_error']} - } - if len(cumulus_result['results']) != 1: - return { - 'statusCode': 500, - 'body': {'message': f'cannot determine exact collection. returning length: {len(cumulus_result["results"])}'} - } - - return { - 'statusCode': 200, - 'body': cumulus_result['results'][0] - } - except Exception as e: - LOGGER.exception(f'unexpected error') - return { - 'statusCode': 500, - 'body': {'message': f'unpredicted error: {str(e)}'} - } - return - - def start(self): - try: - cumulus_result = self.__cumulus.query_direct_to_private_api(self.__cumulus_lambda_prefix, self.__base_url) - if 'server_error' in cumulus_result: - return { - 'statusCode': 500, - 'body': {'message': cumulus_result['server_error']} - } - if 'client_error' in cumulus_result: - return { - 'statusCode': 400, - 'body': {'message': cumulus_result['client_error']} - } - cumulus_size = self.__get_size() - return { - 'statusCode': 200, - 'body': { - 'numberMatched': cumulus_size['total_size'], - 'numberReturned': len(cumulus_result['results']), - 'stac_version': '1.0.0', - 'type': 'FeatureCollection', - 'links': self.__pagination_links, - 'features': cumulus_result['results'], - } - } - except Exception as e: - LOGGER.exception(f'unexpected error') - return { - 'statusCode': 500, - 'body': {'message': f'unpredicted error: {str(e)}'} - } diff --git a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query.py b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query.py deleted file mode 100644 index 20c4570c..00000000 --- a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query.py +++ /dev/null @@ -1,231 +0,0 @@ -import json -import os - -from mdps_ds_lib.lib.cumulus_stac.item_transformer import ItemTransformer -from cumulus_lambda_functions.lib.cql_parser import CqlParser - -from cumulus_lambda_functions.lib.uds_db.uds_collections import UdsCollections - -from mdps_ds_lib.lib.utils.json_validator import JsonValidator - -from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator - -from cumulus_lambda_functions.cumulus_wrapper.query_granules import GranulesQuery -from cumulus_lambda_functions.lib.uds_db.granules_db_index import GranulesDbIndex - -LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) - - -class GranulesDapaQuery: - def __init__(self, collection_id, limit, offset, datetime, filter_input, pagination_links): - self.__pagination_links = pagination_links - self.__limit = limit - self.__offset = offset - self.__filter_input = filter_input - page_number = (offset // limit) + 1 - if 'CUMULUS_LAMBDA_PREFIX' not in os.environ: - raise EnvironmentError('missing key: CUMULUS_LAMBDA_PREFIX') - self.__granules_index = GranulesDbIndex() - self.__cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX') - self.__cumulus = GranulesQuery('https://na/dev', 'NA') - self.__cumulus.with_limit(limit) - self.__cumulus.with_page_number(page_number) - self.__cumulus.with_collection_id(collection_id) - self.__collection_id = collection_id - self.__get_time_range(datetime) - self.__get_filter(filter_input) - self.__es_granules_result = None # this is where Elasticsearch granules result is stored - - def __custom_metadata_query(self): - if self.__filter_input is None: - return self - LOGGER.debug(f'filter_input: {self.__filter_input}') - dsl_query = CqlParser().transform(self.__filter_input) - LOGGER.debug(f'CqlParser result: {dsl_query}') - custom_metadata_query_dsl = { - 'from': self.__offset, - 'size': self.__limit, - 'query': { - 'bool': { - 'must': [ - - dsl_query, - ] - } - } - } - - LOGGER.debug(f'custom_metadata_query_dsl: {custom_metadata_query_dsl}') - collection_identifier = UdsCollections.decode_identifier(self.__collection_id) - LOGGER.debug(f'custom_metadata_query_dsl: {custom_metadata_query_dsl}') - custom_metadata_result = GranulesDbIndex().dsl_search(collection_identifier.tenant, collection_identifier.venue, - custom_metadata_query_dsl) - LOGGER.debug(f'custom_metadata_result: {custom_metadata_result}') - custom_metadata_result = [k['_source'] for k in custom_metadata_result['hits']['hits']] - self.__es_granules_result = {k['granule_id']: k for k in custom_metadata_result} - return self - - - def __get_time_range(self, datetime: str): - if datetime is None: - return self - if '/' not in datetime: - self.__cumulus.with_time(datetime) - return self - split_time_range = [k.strip() for k in datetime.split('/')] - if split_time_range[0] == '..': - self.__cumulus.with_time_to(split_time_range[1]) - return - if split_time_range[1] == '..': - self.__cumulus.with_time_from(split_time_range[0]) - return - self.__cumulus.with_time_range(split_time_range[0], split_time_range[1]) - return self - - def __get_filter(self, filter_input: str): - """ - https://portal.ogc.org/files/96288#rc_filter - https://portal.ogc.org/files/96288#simple-cql_comparison-predicates - - { "eq": [ { "property": "city" }, "Toronto" ] } - - { - "like": [ - { "property": "name" }, - "Smith." - ], - "singleChar": ".", - "nocase": true - } - -{ - "in": { - "value": { "property": "cityName" }, - "list": [ "Toronto", "Franfurt", "Tokyo", "New York" ], - "nocase": false - } -} - :return: - """ - if filter_input is None: - return self - filter_event = json.loads(filter_input) - if 'in' not in filter_event: - return self - schema = { - "type": { - "required": ["in"], - "properties": { - "in": { - "type": "object", - "required": ["value", "list"], - "properties": { - "value": { - "type": "object", - "required": ["property"], - "properties": { - "property": {"type": "string"} - } - }, - "list": { - "type": "array", - "minItems": 1, - "items": { - "type": "string" - } - } - } - } - } - } - } - filter_event_validator_result = JsonValidator(schema).validate(filter_event) - if filter_event_validator_result is not None: - LOGGER.error(f'invalid event: {filter_event_validator_result}. event: {filter_event}') - return self - search_key = filter_event['in']['value']['property'] - search_values = filter_event['in']['value']['list'] - self.__cumulus.with_filter(search_key, search_values) - return self - - def __get_size(self): - try: - cumulus_size = self.__cumulus.get_size(self.__cumulus_lambda_prefix) - except: - LOGGER.exception(f'cannot get cumulus_size') - cumulus_size = {'total_size': -1} - return cumulus_size - - def __get_custom_metadata(self, cumulus_result) -> dict: - custom_meta_query_conditions = [{ - 'bool': { - 'must': [ # TODO split if array is more than 1024 - {'term': {'collection_id': self.__collection_id}}, - {'term': {'granule_id': k['granuleId']}}, - ] - } - } for k in cumulus_result['results']] - custom_metadata_query_dsl = { - '_source': { - 'exclude': ['collection_id'] - }, - 'sort': [{'granule_id': {'order': 'ASC'}}], - 'query': { - 'bool': { - 'should': custom_meta_query_conditions - } - } - } - collection_identifier = UdsCollections.decode_identifier(self.__collection_id) - LOGGER.debug(f'custom_metadata_query_dsl: {custom_metadata_query_dsl}') - custom_metadata_result = GranulesDbIndex().dsl_search(collection_identifier.tenant, collection_identifier.venue, custom_metadata_query_dsl) - LOGGER.debug(f'custom_metadata_result: {custom_metadata_result}') - custom_metadata_result = [k['_source'] for k in custom_metadata_result['hits']['hits']] - custom_metadata_result = {k['granule_id']: k for k in custom_metadata_result} - return custom_metadata_result - - def start(self): - try: - self.__custom_metadata_query() - if self.__es_granules_result is not None: - # already queried custom metadata. - # just need to find those granule ids from Cumulus. - self.__get_filter('granules_id', [k for k in self.__es_granules_result.keys()]) - cumulus_result = self.__cumulus.query_direct_to_private_api(self.__cumulus_lambda_prefix, False) - if 'server_error' in cumulus_result: - return { - 'statusCode': 500, - 'body': {'message': cumulus_result['server_error']} - } - if 'client_error' in cumulus_result: - return { - 'statusCode': 400, - 'body': {'message': cumulus_result['client_error']} - } - cumulus_size = self.__get_size() - LOGGER.debug(f'cumulus_result: {cumulus_result}') - custom_metadata_result = self.__get_custom_metadata(cumulus_result) - main_result_dict = {k['granuleId']: k for k in cumulus_result['results']} - for k, v in main_result_dict.items(): - if k in custom_metadata_result: - if 'granule_id' in custom_metadata_result[k]: - custom_metadata_result[k].pop('granule_id') - v['custom_metadata'] = custom_metadata_result[k] - combined_cumulus_result = [ItemTransformer().to_stac(k) for k in main_result_dict.values()] - return { - 'statusCode': 200, - 'body': { - 'numberMatched': cumulus_size['total_size'], - 'numberReturned': len(cumulus_result['results']), - 'stac_version': '1.0.0', - 'type': 'FeatureCollection', # TODO correct name? - 'links': self.__pagination_links, - 'features': combined_cumulus_result - } - } - except Exception as e: - LOGGER.exception(f'unexpected error') - return { - 'statusCode': 500, - 'body': {'message': f'unpredicted error: {str(e)}'} - } diff --git a/cumulus_lambda_functions/uds_api/granules_api.py b/cumulus_lambda_functions/uds_api/granules_api.py index 0e74887f..6912007a 100644 --- a/cumulus_lambda_functions/uds_api/granules_api.py +++ b/cumulus_lambda_functions/uds_api/granules_api.py @@ -5,9 +5,6 @@ from pydantic import BaseModel from starlette.responses import Response, JSONResponse - -from cumulus_lambda_functions.cumulus_wrapper.query_granules import GranulesQuery - from cumulus_lambda_functions.uds_api.dapa.granules_dapa_query_es import GranulesDapaQueryEs from cumulus_lambda_functions.lib.uds_db.granules_db_index import GranulesDbIndex from cumulus_lambda_functions.uds_api.fast_api_utils import FastApiUtils @@ -160,13 +157,6 @@ async def delete_single_granule_dapa_actual(request: Request, collection_id: str })) try: LOGGER.debug(f'deleting granule: {granule_id}') - include_cumulus = os.getenv('CUMULUS_INCLUSION', 'TRUE').upper().strip() == 'TRUE' - if include_cumulus: - cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX') - cumulus = GranulesQuery('https://na/dev', 'NA') - cumulus.with_collection_id(collection_id) - cumulus_delete_result = cumulus.delete_entry(cumulus_lambda_prefix, granule_id) # TODO not sure it is correct granule ID - LOGGER.debug(f'cumulus_delete_result: {cumulus_delete_result}') es_delete_result = GranulesDbIndex().delete_entry(collection_identifier.tenant, collection_identifier.venue, granule_id @@ -274,14 +264,6 @@ async def add_single_granule_dapa(request: Request, collection_id: str, granule_ try: LOGGER.debug(f'adding granule: {granule_id}') new_granule = new_granule.model_dump() - include_cumulus = os.getenv('CUMULUS_INCLUSION', 'TRUE').upper().strip() == 'TRUE' - if include_cumulus: - cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX') - cumulus = GranulesQuery('https://na/dev', 'NA') - cumulus.with_collection_id(collection_id) - raise NotImplementedError(f'Please implement to convert stac into cumulus granule') - cumulus_add_result = cumulus.add_entry(cumulus_lambda_prefix, {}) # TODO not sure it is correct granule ID - LOGGER.debug(f'cumulus_add_result: {cumulus_add_result}') if 'bbox' in new_granule: new_granule['bbox'] = GranulesDbIndex.to_es_bbox(new_granule['bbox']) collection_identifier = UdsCollections.decode_identifier(collection_id) diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/__init__.py b/tests/cumulus_lambda_functions/cumulus_wrapper/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py deleted file mode 100644 index 4ed51e02..00000000 --- a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_collection.py +++ /dev/null @@ -1,189 +0,0 @@ -import json -import logging -from datetime import datetime -from unittest import TestCase - -from cumulus_lambda_functions.cumulus_wrapper.query_collections import CollectionsQuery -from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator - - -class TestQueryCollection(TestCase): - def test_01(self): - lambda_prefix = 'uds-dev-cumulus' - collection_query = CollectionsQuery('NA', 'NA') - collection_version = int(datetime.utcnow().timestamp()) - sample_collection = { - # "dataType": "MOD09GQ", - # "provider_path": "cumulus-test-data/pdrs", - "name": "UNITY_CUMULUS_DEV_UNIT_TEST", - "version": str(collection_version), - # "process": "modis", - # "duplicateHandling": "skip", - "granuleId": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$", - "granuleIdExtraction": "(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+", - # "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS", - "files": [ - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS", - "type": "data", - "reportToEms": True - }, - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS", - "reportToEms": True, - "type": "metadata" - }, - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS.xml", - "reportToEms": True, - "type": "metadata" - }, - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml", - "reportToEms": True, - "type": "metadata" - } - ], - } - # sample_collection = { - # "createdAt": 1647992847582, - # "reportToEms": True, - # "updatedAt": 1647992847582, - # "timestamp": 1647992849273 - # } - response = collection_query.create_collection(sample_collection, lambda_prefix) - self.assertTrue('status' in response, f'status not in response: {response}') - self.assertEqual('Record saved', response['status'], f'wrong status: {response}') - - delete_response = collection_query.delete_collection(lambda_prefix, 'UNITY_CUMULUS_DEV_UNIT_TEST', str(collection_version)) - self.assertTrue('status' in delete_response, f'status not in response: {response}') - self.assertEqual('Record deleted', delete_response['status'], f'wrong status: {response}') - - return - - def test_02(self): - lambda_prefix = 'uds-dev-cumulus' - collection_query = CollectionsQuery('NA', 'NA') - collection_query.with_limit(2) - collections = collection_query.query_direct_to_private_api(lambda_prefix, 's3://abcd') - self.assertTrue('results' in collections, f'results not in collections: {collections}') - # self.assertEqual(2, len(collections['results']), f'wrong length: {collections}') - return - - def test_03(self): - lambda_prefix = 'uds-dev-cumulus' - collection_query = CollectionsQuery('NA', 'NA') - collection_query.with_limit(2) - collection_query.with_collection_id('CUMULUS_DAPA_UNIT_TEST___1663627653') - collections = collection_query.query_direct_to_private_api(lambda_prefix, 's3://abcd') - self.assertTrue('results' in collections, f'results not in collections: {collections}') - # self.assertEqual(1, len(collections['results']), f'wrong length: {collections}') - # self.assertEqual('CUMULUS_DAPA_UNIT_TEST___1663627653', collections['results'][0]['id'], f'wrong id (DAPA style)') - return - - def test_04(self): - lambda_prefix = 'uds-dev-cumulus' - collection_query = CollectionsQuery('NA', 'NA') - collection_query.with_limit(2) - collection_query.with_collection_id('CUMULUS_DAPA_UNIT_TEST___1663627653') - collections = collection_query.get_size(lambda_prefix) - self.assertTrue('total_size' in collections, f'total_size not in collections: {collections}') - # self.assertEqual(1, collections['total_size'], f'wrong size: {collections}') - return - - - def test_rules_03(self): - LambdaLoggerGenerator.remove_default_handlers() - # logging.basicConfig(level=20, - # format="%(asctime)s [%(levelname)s] [%(name)s::%(lineno)d] %(message)s") - - lambda_prefix = 'uds-dev-cumulus' - collection_query = CollectionsQuery('NA', 'NA') - collection_version = int(datetime.utcnow().timestamp()) - sample_collection = { - # "dataType": "MOD09GQ", - # "provider_path": "cumulus-test-data/pdrs", - "name": "UNITY_CUMULUS_DEV_UNIT_TEST", - "version": str(collection_version), - # "process": "modis", - # "duplicateHandling": "skip", - "granuleId": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0$", - "granuleIdExtraction": "(P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}0).+", - # "url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS", - "files": [ - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00\\.PDS$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS", - "type": "data", - "reportToEms": True - }, - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS", - "reportToEms": True, - "type": "metadata" - }, - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}01\\.PDS\\.xml$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000001.PDS.xml", - "reportToEms": True, - "type": "metadata" - }, - { - "bucket": "internal", - "regex": "^P[0-9]{3}[0-9]{4}[A-Z]{13}T[0-9]{12}00.PDS.cmr.xml$", - "sampleFileName": "P1570515ATMSSCIENCEAXT11344000000000.PDS.cmr.xml", - "reportToEms": True, - "type": "metadata" - } - ], - } - # sample_collection = { - # "createdAt": 1647992847582, - # "reportToEms": True, - # "updatedAt": 1647992847582, - # "timestamp": 1647992849273 - # } - response = collection_query.create_collection(sample_collection, lambda_prefix) - self.assertTrue('status' in response, f'status not in response: {response}') - self.assertEqual('Record saved', response['status'], f'wrong status: {response}') - - response = collection_query.create_sqs_rules( - sample_collection, - lambda_prefix, - 'https://sqs.us-west-2.amazonaws.com/884500545225/uds-dev-cumulus-cnm-submission-queue', - 'SNPP', - 'CatalogGranule' - ) - self.assertTrue('status' in response, f'status not in response: {response}') - self.assertEqual('Record saved', response['status'], f'wrong status: {response}') - return - - def test_rules_04(self): - lambda_prefix = 'uds-sbx-cumulus' - collection_query = CollectionsQuery('NA', 'NA') - collection_query.with_limit(200) - rules = collection_query.query_rules(lambda_prefix) - print(json.dumps(rules, indent=4)) - # self.assertTrue(False, rules) - return - - def test_create_provider(self): - lambda_prefix = 'uds-sbx-cumulus' - collection_query = CollectionsQuery('NA', 'NA') - result = collection_query.create_provider('william-test2', 'uds-sbx-staging', lambda_prefix) - print(result) - return diff --git a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_granules.py b/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_granules.py deleted file mode 100644 index e946739f..00000000 --- a/tests/cumulus_lambda_functions/cumulus_wrapper/test_query_granules.py +++ /dev/null @@ -1,30 +0,0 @@ -from unittest import TestCase - -from cumulus_lambda_functions.cumulus_wrapper.query_granules import GranulesQuery - - -class TestGranulesQuery(TestCase): - def test_01(self): - lambda_prefix = 'uds-dev-cumulus' - - query_granules = GranulesQuery('NA', 'NA') - query_granules.with_collection_id('SNDR_SNPP_ATMS_L1B_OUTPUT___1') - query_granules.with_limit(7) - granules = query_granules.query_direct_to_private_api(lambda_prefix) - self.assertTrue('results' in granules, f'results not in collections: {granules}') - # self.assertEqual(7, len(granules['results']), f'wrong length: {granules}') - return - - def test_02(self): - lambda_prefix = 'uds-dev-cumulus' - - query_granules = GranulesQuery('NA', 'NA') - query_granules.with_collection_id('NEW_COLLECTION_EXAMPLE_L1B___9') - query_granules.with_limit(2) - query_granules.with_filter('id', ['NEW_COLLECTION_EXAMPLE_L1B___9:test_file01', 'NEW_COLLECTION_EXAMPLE_L1B___9:test_file02']) - granules = query_granules.query_direct_to_private_api(lambda_prefix) - self.assertTrue('results' in granules, f'results not in collections: {granules}') - # self.assertEqual(2, len(granules['results']), f'wrong length: {granules}') - # self.assertEqual(granules['results'][0]['id'], 'NEW_COLLECTION_EXAMPLE_L1B___9:test_file01') - # self.assertEqual(granules['results'][1]['id'], 'NEW_COLLECTION_EXAMPLE_L1B___9:test_file02') - return From 1ebec574fca0203376b796d2b71688dbcae573ba Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 15 Oct 2025 13:43:11 -0700 Subject: [PATCH 02/35] BREAKING CHANGE : removing cumulus code --- .../granules_cnm_ingester_logic.py | 3 --- .../uds_api/dapa/collections_dapa_cnm.py | 4 ++++ .../uds_api/dapa/collections_dapa_creation.py | 14 ++++++-------- .../uds_api/dapa/granules_dapa_query_es.py | 5 +++++ docker/docker-compose-dapa.yml | 1 - docker/docker-compose-web-service.yml | 4 ---- .../test_granules_cnm_ingester_logic.py | 3 --- tf-module/unity-cumulus/granules_cnm_ingester.tf | 3 --- tf-module/unity-cumulus/main.tf | 4 ---- tf-module/unity-cumulus/terraform.tfvars.example | 1 - tf-module/unity-cumulus/variables.tf | 10 ---------- 11 files changed, 15 insertions(+), 37 deletions(-) diff --git a/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py b/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py index b8ba59ae..587d950a 100644 --- a/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py +++ b/cumulus_lambda_functions/granules_cnm_ingester/granules_cnm_ingester_logic.py @@ -22,10 +22,7 @@ TODO UNITY_DEFAULT_PROVIDER -CUMULUS_WORKFLOW_NAME REPORT_TO_EMS -CUMULUS_WORKFLOW_SQS_URL -CUMULUS_LAMBDA_PREFIX ES_URL ES_PORT SNS_TOPIC_ARN diff --git a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_cnm.py b/cumulus_lambda_functions/uds_api/dapa/collections_dapa_cnm.py index 96381885..462a6551 100644 --- a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_cnm.py +++ b/cumulus_lambda_functions/uds_api/dapa/collections_dapa_cnm.py @@ -38,6 +38,10 @@ def __init__(self, request_body): def start_facade(self, current_url: URL): LOGGER.debug(f'request body: {self.__request_body}') + if os.getenv('IS_API_IN_DOCKER', 'FALSE') == 'TRUE': + LOGGER.debug(f'In docker. No time limit to pause for creation') + return self.start() + actual_path = current_url.path actual_path = actual_path if actual_path.endswith('/') else f'{actual_path}/' actual_path = f'{actual_path}actual' diff --git a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py b/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py index a41618ee..9a187647 100644 --- a/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py +++ b/cumulus_lambda_functions/uds_api/dapa/collections_dapa_creation.py @@ -62,17 +62,10 @@ class CumulusCollectionModel(BaseModel): class CollectionDapaCreation: def __init__(self, request_body): - required_env = ['CUMULUS_LAMBDA_PREFIX', 'CUMULUS_WORKFLOW_SQS_URL'] - if not all([k in os.environ for k in required_env]): - raise EnvironmentError(f'one or more missing env: {required_env}') - self.__request_body = request_body self.__collection_creation_lambda_name = os.environ.get('COLLECTION_CREATION_LAMBDA_NAME', '').strip() - self.__cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX') - self.__include_cumulus = os.getenv('CUMULUS_INCLUSION', 'TRUE').upper().strip() == 'TRUE' - self.__ingest_sqs_url = os.getenv('CUMULUS_WORKFLOW_SQS_URL') + self.__include_cumulus = os.getenv('CUMULUS_INCLUSION', 'FALSE').upper().strip() == 'TRUE' self.__report_to_ems = os.getenv('REPORT_TO_EMS', 'TRUE').strip().upper() == 'TRUE' - self.__workflow_name = os.getenv('CUMULUS_WORKFLOW_NAME', 'CatalogGranule') self.__provider_id = os.getenv('UNITY_DEFAULT_PROVIDER', '') self.__collection_transformer = CollectionTransformer(self.__report_to_ems) self.__uds_collection = UdsCollections(es_url=os.getenv('ES_URL'), es_port=int(os.getenv('ES_PORT', '443')), es_type=os.getenv('ES_TYPE', 'AWS'), use_ssl=os.getenv('ES_USE_SSL', 'TRUE').strip() is True) @@ -199,6 +192,11 @@ def start(self, current_url: URL, bearer_token: str): 'body': {'message': f'request body is not valid STAC Collection schema. check details', 'details': validation_result} } + + if os.getenv('IS_API_IN_DOCKER', 'FALSE') == 'TRUE': + LOGGER.debug(f'In docker. No time limit to pause for creation') + return self.create() + actual_path = current_url.path actual_path = actual_path if actual_path.endswith('/') else f'{actual_path}/' actual_path = f'{actual_path}actual' diff --git a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py index 345674cd..6ec580e4 100644 --- a/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py +++ b/cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py @@ -192,6 +192,11 @@ def get_single_granule(self, granule_id): return each_granules_query_result_stripped def delete_facade(self, current_url: URL, bearer_token: str): + if os.getenv('IS_API_IN_DOCKER', 'FALSE') == 'TRUE': + LOGGER.debug(f'In docker. No time limit to pause for creation') + return self.start() + + actual_path = current_url.path actual_path = actual_path if actual_path.endswith('/') else f'{actual_path}/' actual_path = f'{actual_path}actual' diff --git a/docker/docker-compose-dapa.yml b/docker/docker-compose-dapa.yml index 5975b457..f7a93bab 100644 --- a/docker/docker-compose-dapa.yml +++ b/docker/docker-compose-dapa.yml @@ -4,7 +4,6 @@ services: image: cae-artifactory.jpl.nasa.gov:16001/gov/nasa/jpl/unity/uds:latest container_name: parquet-flask environment: - - CUMULUS_BASE='https://axhmoecy02.execute-api.us-west-2.amazonaws.com/dev' - log_level=DEBUG ports: - 9801:9801 diff --git a/docker/docker-compose-web-service.yml b/docker/docker-compose-web-service.yml index 957a98a3..3633b0f7 100644 --- a/docker/docker-compose-web-service.yml +++ b/docker/docker-compose-web-service.yml @@ -14,9 +14,6 @@ services: PYTHONPATH: '${PYTHONPATH}:/usr/src/app/unity' COLLECTION_CREATION_LAMBDA_NAME: 'arn:aws:lambda:us-west-2:xxx:function:uds-sbx-cumulus-uds_api_1' - CUMULUS_LAMBDA_PREFIX: 'uds-sbx-cumulus' - CUMULUS_WORKFLOW_NAME: 'CatalogGranule' - CUMULUS_WORKFLOW_SQS_URL: 'https://sqs.us-west-2.amazonaws.com/xxx/uds-sbx-cumulus-cnm-submission-queue' LOG_LEVEL: '10' UNITY_DEFAULT_PROVIDER: 'unity' SNS_TOPIC_ARN: 'arn:aws:sns:us-west-2:xxx:uds-sbx-cumulus-cnm-submission-sns' @@ -24,7 +21,6 @@ services: ADMIN_COMMA_SEP_GROUPS: 'Unity_Admin' DAPA_API_PREIFX_KEY: 'data' STATIC_PARENT_DIR: '/usr/src/app/unity/cumulus_lambda_functions/uds_api/' - CUMULUS_BASE: 'https://na/dev' DAPA_API_URL_BASE: 'https://d3vc8w9zcq658.cloudfront.net/data' ES_PORT: '443' REPORT_TO_EMS: 'FALSE' diff --git a/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py b/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py index 4373b9af..517c44b7 100644 --- a/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py +++ b/tests/cumulus_lambda_functions/granules_cnm_ingester/test_granules_cnm_ingester_logic.py @@ -15,10 +15,7 @@ def __init__(self, methodName: str = ...) -> None: os.environ['SNS_TOPIC_ARN'] = 'arn:aws:sns:us-west-2:237868187491:uds-sbx-cumulus-cnm-submission-sns' os.environ['COLLECTION_CREATION_LAMBDA_NAME'] = 'NA' os.environ['UNITY_DEFAULT_PROVIDER'] = 'unity' - os.environ['CUMULUS_WORKFLOW_NAME'] = 'CatalogGranule' os.environ['REPORT_TO_EMS'] = 'FALSE' - os.environ['CUMULUS_LAMBDA_PREFIX'] = 'uds-sbx-cumulus' - os.environ['CUMULUS_WORKFLOW_SQS_URL'] = 'https://sqs.us-west-2.amazonaws.com/237868187491/uds-sbx-cumulus-cnm-submission-queue' os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com' os.environ['ES_PORT'] = '9200' diff --git a/tf-module/unity-cumulus/granules_cnm_ingester.tf b/tf-module/unity-cumulus/granules_cnm_ingester.tf index c8ebc725..d584f3ad 100644 --- a/tf-module/unity-cumulus/granules_cnm_ingester.tf +++ b/tf-module/unity-cumulus/granules_cnm_ingester.tf @@ -14,10 +14,7 @@ resource "aws_lambda_function" "granules_cnm_ingester" { SNS_TOPIC_ARN = var.cnm_sns_topic_arn ES_URL = aws_elasticsearch_domain.uds-es.endpoint ES_PORT = 443 - CUMULUS_WORKFLOW_SQS_URL = var.workflow_sqs_url - CUMULUS_LAMBDA_PREFIX = var.prefix REPORT_TO_EMS = var.report_to_ems - CUMULUS_WORKFLOW_NAME = "CatalogGranule" UNITY_DEFAULT_PROVIDER = var.unity_default_provider COLLECTION_CREATION_LAMBDA_NAME = "NA" } diff --git a/tf-module/unity-cumulus/main.tf b/tf-module/unity-cumulus/main.tf index 717f1433..c7906150 100644 --- a/tf-module/unity-cumulus/main.tf +++ b/tf-module/unity-cumulus/main.tf @@ -144,11 +144,7 @@ resource "aws_lambda_function" "uds_api_1" { memory_size = 512 environment { variables = { - CUMULUS_BASE = var.cumulus_base - CUMULUS_LAMBDA_PREFIX = var.prefix LOG_LEVEL = var.log_level - CUMULUS_WORKFLOW_SQS_URL = var.workflow_sqs_url - CUMULUS_WORKFLOW_NAME = "CatalogGranule" UNITY_DEFAULT_PROVIDER = var.unity_default_provider COLLECTION_CREATION_LAMBDA_NAME = "arn:aws:lambda:${var.aws_region}:${local.account_id}:function:${var.prefix}-uds_api_1" SNS_TOPIC_ARN = var.cnm_sns_topic_arn diff --git a/tf-module/unity-cumulus/terraform.tfvars.example b/tf-module/unity-cumulus/terraform.tfvars.example index fedf5746..4167dbc0 100644 --- a/tf-module/unity-cumulus/terraform.tfvars.example +++ b/tf-module/unity-cumulus/terraform.tfvars.example @@ -4,7 +4,6 @@ cumulus_lambda_subnet_ids = ["subnet-00cacaab15b901d53", "subnet-068f7d5c0a85 cumulus_lambda_vpc_id = "vpc-06e627ef021d1854e" security_group_ids = ["sg-045f9c24c760940b6"] aws_region = "us-west-2" -cumulus_base = "https://na/dev" cnm_sns_topic_arn = "arn:aws:sns:us-west-2:884500545225:am-uds-dev-cumulus-cnm-submission-sns" lambda_processing_role_arn = "arn:aws:iam::884500545225:role/am-uds-dev-cumulus-lambda-processing" report_to_ems = "FALSE" diff --git a/tf-module/unity-cumulus/variables.tf b/tf-module/unity-cumulus/variables.tf index 04508a25..591b902f 100644 --- a/tf-module/unity-cumulus/variables.tf +++ b/tf-module/unity-cumulus/variables.tf @@ -57,11 +57,6 @@ variable "cnm_sns_topic_arn" { type = string } -variable "workflow_sqs_url" { - type = string - description = "SNS ARN of CNM submission topic" -} - variable "unity_default_provider" { type = string description = "default provider name" @@ -94,11 +89,6 @@ variable "report_to_ems" { default = "TRUE" } -variable "cumulus_base" { - type = string - description = "Cumulus base URL. Example: https://axhmoecy02.execute-api.us-west-2.amazonaws.com/dev" -} - variable "register_custom_metadata" { type = string default = "TRUE" From 513c8d3ef7d7a421ece8b958b4947a4f71ebca1f Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 30 Oct 2025 10:34:11 -0700 Subject: [PATCH 03/35] feat: new daac delivery logic --- .../daac_archiver/daac_archiver_catalia.py | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py new file mode 100644 index 00000000..e051f523 --- /dev/null +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -0,0 +1,158 @@ +from mdps_ds_lib.lib.aws.aws_s3 import AwsS3 +from mdps_ds_lib.lib.aws.aws_sns import AwsSns +from mdps_ds_lib.lib.utils.time_utils import TimeUtils +from mdps_ds_lib.stac_fast_api_client.sfa_client_factory import SFAClientFactory +from pystac import Item + +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + +class DaacArchiverCatalia: + archival_status_schema = { + "type": "object", + "required": [ + "status" + ], + "properties": { + "status": { + "type": "string", + "enum": [ + "cnm-authorized-success", + "cnm-authorized-failed", + "cnm-staged-success", + "cnm-staged-failed", + "cnm-submit-success", + "cnm-submit-failed", + "cnm-receive-success", + "cnm-receive-failed" + ] + }, + "errorCode": { + "type": "string" + }, + "errorMessage": { + "type": "string" + }, + "href": { + "type": "string", + "format": "iri-reference" + } + }, + "additionalProperties": False + } + def __init__(self): + self.__sns = AwsSns() + self.__s3 = AwsS3() + self.__staged_s3_bucket = 'TODO' # TODO + self.__sfa_client = SFAClientFactory().get_instance_from_env() + self.__archiving_granules_stac = None + self.__archiving_status_extension_url = "https://stac-extensions.github.io/file/v2.1.0/schema.json" + self.__daac_agreements = [] + + def archive_granule(self, collection_id, granule_id): + # TODO look up granule details + self.__archiving_granules_stac = self.__sfa_client.get_item(collection_id, item_id=granule_id) + LOGGER.debug(f'retrieved stac_item from STAC Fast API: {self.__archiving_granules_stac}') + self.archive_granule_json() + return self + + def archive_granule_json(self): + """ + 1. Check UDS API if this granule is being pushed to archive(s) + 2. Copy Data and Metadata to staging bucket + 3. Update STAC Metadata to staging bucket. + Re-upload. + 4. Send message to DAAC SNS + + :param stac_granule_json: + :return: + """ + if self.__archiving_granules_stac is None: + raise ValueError(f'NULL archiving granule. Pls retrieve it first.') + self.add_archival_extension() + self.get_daac_configs() + if len(self.__daac_agreements) < 1: + LOGGER.debug(f'this collection does not have any daac. {self.__archiving_granules_stac}') + return + self.stage_files() + for each_agreement in self.__daac_agreements: + LOGGER.debug(f'working on {each_agreement}') + self.send_daac_sns(each_agreement) + return + + def add_archival_extension(self): + """ + 1. Convert dictionary to pystac object. store the modified object back to the self.__archiving_granules_stac + 2. Check if it has a stac_extensions, and it has self.__archiving_status_extension_url + 3. If so, done + 4. If not, add that extension, done + + :return: + """ + return self + + def get_daac_configs(self): + # TODO + # update self.__daac_agreements + return + + def stage_files(self): + """ + 1. Check directory s3://// + 2. If not empty. log a warning message. + 3. Empty S3 directory + 4. Get file locations for each asset in self.__archiving_granules_stac which should be a pystac object. + 5. Copy them from source S3 to destination S3 from Step 1. + 6. After each copy, update the href of each asset to new location. + 7. If pystac is part of the assets, change its href to new location as well and upload it. + 8. How do I know if pystac is part of assets? + :return: + """ + return self + + def update_status(self, archival_status: dict): + """ + 1. validate archival_status from parameter against self.archival_status_schema + 2. Add archival_status to self.__archiving_granules_stac>properties>archival:status + 3. get collection and item id from self.__archiving_granules_stac + 4. convert self.__archiving_granules_stac to a json + 5. call self.__sfa_client.update_item() # Note partial may not be available. Just update whole for now. + :param archival_status: + :return: + """ + return self + + def send_daac_sns(self, daac_config): + try: + self.__sns.set_topic_arn(daac_config['daac_sns_topic_arn']) + daac_cnm_message = { + "collection": { + 'name': daac_config['daac_collection_name'], + 'version': daac_config['daac_data_version'], + }, + "identifier": uds_cnm_json['identifier'], + "submissionTime": f'{TimeUtils.get_current_time()}Z', + "provider": daac_config['daac_provider'] if 'daac_provider' in daac_config else granule_identifier.tenant, + "version": "1.6.0", # TODO this is hardcoded? + "product": { + "name": granule_identifier.granule, + # "dataVersion": daac_config['daac_data_version'], + 'files': self.__extract_files(uds_cnm_json, daac_config), + } + } + LOGGER.debug(f'daac_cnm_message: {daac_cnm_message}') + self.__sns.set_external_role(daac_config['daac_role_arn'], daac_config['daac_role_session_name']).publish_message(json.dumps(daac_cnm_message), True) + self.__granules_index.update_entry(granule_identifier.tenant, granule_identifier.venue, { + 'archive_status': 'cnm_s_success', + 'archive_error_message': '', + 'archive_error_code': '', + }, uds_cnm_json['identifier']) + except Exception as e: + LOGGER.exception(f'failed during archival process') + self.__granules_index.update_entry(granule_identifier.tenant, granule_identifier.venue, { + 'archive_status': 'cnm_s_failed', + 'archive_error_message': str(e), + }, uds_cnm_json['identifier']) + + return + + def From c9677cb7ae6eeed96a1e01ddff62c05d546f7252 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 30 Oct 2025 13:45:01 -0700 Subject: [PATCH 04/35] feat: implement some methods with claude help --- .../daac_archiver/daac_archiver_catalia.py | 202 +++++++++++++++++- requirements.txt | 2 +- 2 files changed, 199 insertions(+), 5 deletions(-) diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index e051f523..aa549ae8 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -1,7 +1,9 @@ +import json from mdps_ds_lib.lib.aws.aws_s3 import AwsS3 from mdps_ds_lib.lib.aws.aws_sns import AwsSns from mdps_ds_lib.lib.utils.time_utils import TimeUtils from mdps_ds_lib.stac_fast_api_client.sfa_client_factory import SFAClientFactory +from mdps_ds_lib.stage_in_out.stage_in_out_utils import StageInOutUtils from pystac import Item from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator @@ -36,7 +38,14 @@ class DaacArchiverCatalia: "href": { "type": "string", "format": "iri-reference" - } + }, + "datetime": { + "title": "Date and Time", + "description": "timestamp of this update, in UTC (Formatted in RFC 3339) ", + "type": "string", + "format": "date-time", + "pattern": "(\\+00:00|Z)$" + } }, "additionalProperties": False } @@ -88,6 +97,27 @@ def add_archival_extension(self): :return: """ + if self.__archiving_granules_stac is None: + raise ValueError(f'NULL archiving granule. Cannot add archival extension.') + + # Convert to pystac Item if it's a dictionary + if isinstance(self.__archiving_granules_stac, dict): + self.__archiving_granules_stac = Item.from_dict(self.__archiving_granules_stac) + + # Check if the archival extension is already present + if hasattr(self.__archiving_granules_stac, 'stac_extensions'): + if self.__archiving_status_extension_url not in self.__archiving_granules_stac.stac_extensions: + self.__archiving_granules_stac.stac_extensions.append(self.__archiving_status_extension_url) + LOGGER.debug(f'Added archival extension to STAC item: {self.__archiving_status_extension_url}') + else: + # Initialize stac_extensions if it doesn't exist + self.__archiving_granules_stac.stac_extensions = [self.__archiving_status_extension_url] + LOGGER.debug(f'Initialized stac_extensions with archival extension: {self.__archiving_status_extension_url}') + + # Initialize archival:status property if it doesn't exist + if 'archival:status' not in self.__archiving_granules_stac.properties: + self.__archiving_granules_stac.properties['archival:status'] = [] + LOGGER.debug(f'Initialized archival:status property for STAC item') return self def get_daac_configs(self): @@ -107,6 +137,116 @@ def stage_files(self): 8. How do I know if pystac is part of assets? :return: """ + if self.__archiving_granules_stac is None: + raise ValueError(f'NULL archiving granule. Cannot stage files.') + + if self.__staged_s3_bucket == 'TODO': + raise ValueError(f'Staged S3 bucket is not configured. Please set self.__staged_s3_bucket.') + + # Get collection and item IDs + collection_id = self.__archiving_granules_stac.collection_id + item_id = self.__archiving_granules_stac.id + + # Define staging directory path + staging_prefix = f"{collection_id}/{item_id}/" + staging_s3_path = f"s3://{self.__staged_s3_bucket}/{staging_prefix}" + + LOGGER.info(f'Staging files to: {staging_s3_path}') + + # Check if staging directory exists and has content + try: + existing_objects = list(self.__s3.get_child_s3_files(self.__staged_s3_bucket, staging_prefix)) + if existing_objects: + LOGGER.warning(f'Staging directory {staging_s3_path} is not empty. Found {len(existing_objects)} objects. Cleaning up...') + # Empty the staging directory using delete_multiple in chunks + object_keys = [obj_key for obj_key, obj_size in existing_objects] # Extract just the keys from (key, size) tuples + + # Delete in chunks to avoid overwhelming S3 delete API + for chunk in StageInOutUtils.chunk_list(object_keys, 50): + try: + self.__s3.delete_multiple(s3_bucket=self.__staged_s3_bucket, s3_paths=chunk) + LOGGER.debug(f'Removed {len(chunk)} objects from staging directory') + except Exception as chunk_e: + LOGGER.error(f'Failed to delete chunk of objects: {chunk_e}') + raise + LOGGER.info(f'Successfully cleaned up {len(object_keys)} objects from staging directory') + except Exception as e: + LOGGER.debug(f'No existing objects found in staging directory or error checking: {e}') + + # Process each asset in the STAC item + staged_assets = {} + for asset_key, asset in self.__archiving_granules_stac.assets.items(): + if hasattr(asset, 'href') and asset.href: + source_href = asset.href + LOGGER.debug(f'Processing asset {asset_key} from {source_href}') + + # Parse S3 URL to get bucket and key + if source_href.startswith('s3://'): + # Remove s3:// prefix and split + s3_path = source_href[5:] + bucket_key_parts = s3_path.split('/', 1) + if len(bucket_key_parts) == 2: + source_bucket, source_key = bucket_key_parts + + # Define destination key (preserve original filename) + filename = source_key.split('/')[-1] + dest_key = f"{staging_prefix}{filename}" + dest_href = f"s3://{self.__staged_s3_bucket}/{dest_key}" + + try: + # Copy file to staging bucket + self.__s3.copy_artifact(source_bucket, source_key, self.__staged_s3_bucket, dest_key, copy_tags=False, delete_original=False) + LOGGER.info(f'Copied {source_href} to {dest_href}') + + # Update asset href to new location + asset.href = dest_href + staged_assets[asset_key] = dest_href + + except Exception as e: + LOGGER.error(f'Failed to copy asset {asset_key} from {source_href} to {dest_href}: {e}') + raise + else: + LOGGER.warning(f'Invalid S3 URL format for asset {asset_key}: {source_href}') + else: + LOGGER.warning(f'Non-S3 asset {asset_key} not staged: {source_href}') + + # Check if there's a STAC metadata file in assets and handle it + stac_metadata_key = None + for asset_key, asset in self.__archiving_granules_stac.assets.items(): + if asset_key.lower() in ['metadata', 'stac', 'item'] or asset.href.endswith('.json'): + stac_metadata_key = asset_key + break + + # Upload the updated STAC item to staging area + stac_filename = f"{item_id}.json" + stac_dest_key = f"{staging_prefix}{stac_filename}" + stac_dest_href = f"s3://{self.__staged_s3_bucket}/{stac_dest_key}" + + try: + # Convert STAC item to JSON and upload + stac_json = self.__archiving_granules_stac.to_dict() + self.__s3.set_s3_url(f's3://{self.__staged_s3_bucket}/{stac_dest_key}').upload_bytes( + bytes(str(stac_json).encode('utf-8')), + content_type='application/json' + ) + LOGGER.info(f'Uploaded updated STAC metadata to {stac_dest_href}') + + # Update or add STAC metadata asset reference + if stac_metadata_key: + self.__archiving_granules_stac.assets[stac_metadata_key].href = stac_dest_href + else: + # Add new asset for STAC metadata + from pystac import Asset + self.__archiving_granules_stac.add_asset( + 'stac-metadata', + Asset(href=stac_dest_href, media_type='application/json', title='STAC Metadata') + ) + + except Exception as e: + LOGGER.error(f'Failed to upload STAC metadata to {stac_dest_href}: {e}') + raise + + LOGGER.info(f'Successfully staged {len(staged_assets)} assets for granule {item_id}') return self def update_status(self, archival_status: dict): @@ -119,7 +259,63 @@ def update_status(self, archival_status: dict): :param archival_status: :return: """ - return self + import jsonschema + from datetime import datetime + + if self.__archiving_granules_stac is None: + raise ValueError(f'NULL archiving granule. Cannot update status.') + + if not isinstance(archival_status, dict): + raise ValueError(f'archival_status must be a dictionary, got {type(archival_status)}') + + # Validate archival_status against schema + try: + jsonschema.validate(archival_status, self.archival_status_schema) + LOGGER.debug(f'archival_status validation successful: {archival_status}') + except jsonschema.ValidationError as e: + LOGGER.error(f'archival_status validation failed: {e}') + raise ValueError(f'Invalid archival_status format: {e.message}') + + # Add timestamp to the status + archival_status_with_timestamp = archival_status.copy() + archival_status_with_timestamp['datetime'] = f'{TimeUtils.get_current_time()}Z' + + # Ensure archival:status property exists and is a list + if 'archival:status' not in self.__archiving_granules_stac.properties: + self.__archiving_granules_stac.properties['archival:status'] = [] + elif not isinstance(self.__archiving_granules_stac.properties['archival:status'], list): + self.__archiving_granules_stac.properties['archival:status'] = [] + + # Add the new status to the list + self.__archiving_granules_stac.properties['archival:status'].append(archival_status_with_timestamp) + LOGGER.info(f'Added archival status: {archival_status_with_timestamp}') + + # Get collection and item IDs + collection_id = self.__archiving_granules_stac.collection_id + item_id = self.__archiving_granules_stac.id + + if not collection_id or not item_id: + raise ValueError(f'Missing collection_id or item_id from STAC item. collection_id: {collection_id}, item_id: {item_id}') + + try: + # Convert STAC item to JSON dictionary + stac_item_dict = self.__archiving_granules_stac.to_dict() + + # Update the item using the STAC Fast API client + updated_item = self.__sfa_client.update_item( + collection_id=collection_id, + item_id=item_id, + item=stac_item_dict + ) + + LOGGER.info(f'Successfully updated STAC item {item_id} in collection {collection_id} with new archival status') + LOGGER.debug(f'Updated item response: {updated_item}') + + return self + + except Exception as e: + LOGGER.error(f'Failed to update STAC item {item_id} in collection {collection_id}: {e}') + raise RuntimeError(f'Failed to update STAC item status: {e}') from e def send_daac_sns(self, daac_config): try: @@ -154,5 +350,3 @@ def send_daac_sns(self, daac_config): }, uds_cnm_json['identifier']) return - - def diff --git a/requirements.txt b/requirements.txt index 870508d8..5222289d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ jsonschema==4.23.0 jsonschema-specifications==2023.12.1 lark==0.12.0 mangum==0.18.0 -mdps-ds-lib==1.2.0.dev100 +mdps-ds-lib==1.2.0.dev200 pydantic==2.9.2 pydantic_core==2.23.4 pygeofilter==0.2.4 From 4d7620aef3e58c4464b313a8639efe4fd734b4c2 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 4 Nov 2025 15:58:37 -0800 Subject: [PATCH 05/35] fix: update staging file method --- .../daac_archiver/daac_archiver_catalia.py | 64 +---- .../test_daac_archiver_catalia.py | 262 ++++++++++++++++++ 2 files changed, 265 insertions(+), 61 deletions(-) create mode 100644 tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index aa549ae8..45607ddb 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -9,6 +9,7 @@ from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + class DaacArchiverCatalia: archival_status_schema = { "type": "object", @@ -55,7 +56,7 @@ def __init__(self): self.__staged_s3_bucket = 'TODO' # TODO self.__sfa_client = SFAClientFactory().get_instance_from_env() self.__archiving_granules_stac = None - self.__archiving_status_extension_url = "https://stac-extensions.github.io/file/v2.1.0/schema.json" + self.__archiving_status_extension_url = "https://stac-extensions.github.io/archival_statuses/v1.0.0/schema.json" self.__daac_agreements = [] def archive_granule(self, collection_id, granule_id): @@ -148,31 +149,10 @@ def stage_files(self): item_id = self.__archiving_granules_stac.id # Define staging directory path - staging_prefix = f"{collection_id}/{item_id}/" + staging_prefix = f"{collection_id}/{item_id}/{TimeUtils.get_current_time()}/" staging_s3_path = f"s3://{self.__staged_s3_bucket}/{staging_prefix}" - LOGGER.info(f'Staging files to: {staging_s3_path}') - # Check if staging directory exists and has content - try: - existing_objects = list(self.__s3.get_child_s3_files(self.__staged_s3_bucket, staging_prefix)) - if existing_objects: - LOGGER.warning(f'Staging directory {staging_s3_path} is not empty. Found {len(existing_objects)} objects. Cleaning up...') - # Empty the staging directory using delete_multiple in chunks - object_keys = [obj_key for obj_key, obj_size in existing_objects] # Extract just the keys from (key, size) tuples - - # Delete in chunks to avoid overwhelming S3 delete API - for chunk in StageInOutUtils.chunk_list(object_keys, 50): - try: - self.__s3.delete_multiple(s3_bucket=self.__staged_s3_bucket, s3_paths=chunk) - LOGGER.debug(f'Removed {len(chunk)} objects from staging directory') - except Exception as chunk_e: - LOGGER.error(f'Failed to delete chunk of objects: {chunk_e}') - raise - LOGGER.info(f'Successfully cleaned up {len(object_keys)} objects from staging directory') - except Exception as e: - LOGGER.debug(f'No existing objects found in staging directory or error checking: {e}') - # Process each asset in the STAC item staged_assets = {} for asset_key, asset in self.__archiving_granules_stac.assets.items(): @@ -209,44 +189,6 @@ def stage_files(self): LOGGER.warning(f'Invalid S3 URL format for asset {asset_key}: {source_href}') else: LOGGER.warning(f'Non-S3 asset {asset_key} not staged: {source_href}') - - # Check if there's a STAC metadata file in assets and handle it - stac_metadata_key = None - for asset_key, asset in self.__archiving_granules_stac.assets.items(): - if asset_key.lower() in ['metadata', 'stac', 'item'] or asset.href.endswith('.json'): - stac_metadata_key = asset_key - break - - # Upload the updated STAC item to staging area - stac_filename = f"{item_id}.json" - stac_dest_key = f"{staging_prefix}{stac_filename}" - stac_dest_href = f"s3://{self.__staged_s3_bucket}/{stac_dest_key}" - - try: - # Convert STAC item to JSON and upload - stac_json = self.__archiving_granules_stac.to_dict() - self.__s3.set_s3_url(f's3://{self.__staged_s3_bucket}/{stac_dest_key}').upload_bytes( - bytes(str(stac_json).encode('utf-8')), - content_type='application/json' - ) - LOGGER.info(f'Uploaded updated STAC metadata to {stac_dest_href}') - - # Update or add STAC metadata asset reference - if stac_metadata_key: - self.__archiving_granules_stac.assets[stac_metadata_key].href = stac_dest_href - else: - # Add new asset for STAC metadata - from pystac import Asset - self.__archiving_granules_stac.add_asset( - 'stac-metadata', - Asset(href=stac_dest_href, media_type='application/json', title='STAC Metadata') - ) - - except Exception as e: - LOGGER.error(f'Failed to upload STAC metadata to {stac_dest_href}: {e}') - raise - - LOGGER.info(f'Successfully staged {len(staged_assets)} assets for granule {item_id}') return self def update_status(self, archival_status: dict): diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py new file mode 100644 index 00000000..919489a5 --- /dev/null +++ b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py @@ -0,0 +1,262 @@ +import json +import os +import tempfile +import uuid +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock +from datetime import datetime + +from pystac import Item, Asset +from cumulus_lambda_functions.daac_archiver.daac_archiver_catalia import DaacArchiverCatalia + + +class TestDaacArchiverCatalia(TestCase): + + def setUp(self): + """Set up test fixtures before each test method.""" + self.s3_source_bucket = 'test-source-bucket' # Fill this with actual bucket name later + self.s3_staged_bucket = 'test-staged-bucket' # Fill this with actual staged bucket name later + + # Create test collection and item IDs + self.collection_id = 'test-collection' + self.item_id = f'test-item-{uuid.uuid4().hex[:8]}' + + # Create test data content + self.test_data_content = b'This is test granule data content for testing' + self.test_metadata_content = b'This is test metadata content for testing' + + # Create test filenames + self.test_data_filename = f'{self.item_id}_data.tif' + self.test_metadata_filename = f'{self.item_id}_metadata.xml' + + # Setup mock S3 paths + self.source_data_key = f'source/{self.collection_id}/{self.test_data_filename}' + self.source_metadata_key = f'source/{self.collection_id}/{self.test_metadata_filename}' + self.source_stac_key = f'source/{self.collection_id}/{self.item_id}.json' + + # Expected staging paths + self.staging_prefix = f'{self.collection_id}/{self.item_id}/' + self.staged_data_key = f'{self.staging_prefix}{self.test_data_filename}' + self.staged_metadata_key = f'{self.staging_prefix}{self.test_metadata_filename}' + self.staged_stac_key = f'{self.staging_prefix}{self.item_id}.json' + + def test_stage_files_01(self): + """ + Test stage_files method with complete workflow: + 1. Creates actual dummy data and metadata files in temp directories + 2. Creates STAC item with assets pointing to source S3 locations + 3. Calls stage_files method + 4. Verifies files are copied to staging bucket with correct content + 5. Verifies STAC metadata has updated asset URLs and correct content + """ + + with tempfile.TemporaryDirectory() as temp_source_dir, \ + tempfile.TemporaryDirectory() as temp_staged_dir: + + # Create actual source files in temp directory + source_data_file = os.path.join(temp_source_dir, self.test_data_filename) + source_metadata_file = os.path.join(temp_source_dir, self.test_metadata_filename) + + # Write test content to source files + with open(source_data_file, 'wb') as f: + f.write(self.test_data_content) + with open(source_metadata_file, 'wb') as f: + f.write(self.test_metadata_content) + + # Create STAC Item with assets pointing to source S3 locations + stac_item = Item( + id=self.item_id, + geometry={ + "type": "Polygon", + "coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]]] + }, + bbox=[-180, -90, 180, 90], + datetime=datetime.now(), + properties={} + ) + + # Add assets pointing to source S3 locations + stac_item.add_asset( + 'data', + Asset( + href=f's3://{self.s3_source_bucket}/{self.source_data_key}', + media_type='image/tiff', + title='Test Data File' + ) + ) + + stac_item.add_asset( + 'metadata', + Asset( + href=f's3://{self.s3_source_bucket}/{self.source_metadata_key}', + media_type='application/xml', + title='Test Metadata File' + ) + ) + + # Set collection ID + stac_item.collection_id = self.collection_id + + # Storage for captured upload content + uploaded_files = {} + uploaded_stac_content = None + + def mock_s3_cp(source_bucket, source_key, dest_bucket, dest_key, + copy_tags: float = True, update_old_metadata_style: bool = True, delete_original: bool = False): + """Mock S3 copy that saves content to temp staged directory""" + # Simulate copying from source to destination + source_file_path = None + if source_key == self.source_data_key: + source_file_path = source_data_file + elif source_key == self.source_metadata_key: + source_file_path = source_metadata_file + + if source_file_path and os.path.exists(source_file_path): + # Create destination directory structure + dest_dir = os.path.join(temp_staged_dir, os.path.dirname(dest_key)) + os.makedirs(dest_dir, exist_ok=True) + + # Copy file content to simulate S3 copy + dest_file_path = os.path.join(temp_staged_dir, dest_key) + with open(source_file_path, 'rb') as src, open(dest_file_path, 'wb') as dst: + dst.write(src.read()) + + # Store for verification + uploaded_files[dest_key] = dest_file_path + + def mock_upload_bytes(content, content_type=None): + """Mock S3 upload_bytes that captures the STAC content""" + nonlocal uploaded_stac_content + if isinstance(content, bytes): + uploaded_stac_content = content.decode('utf-8') + else: + uploaded_stac_content = str(content) + + # Create DaacArchiverCatalia instance with mocked dependencies + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsS3') as mock_s3_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsSns') as mock_sns_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.SFAClientFactory') as mock_sfa_factory: + + # Setup mocks + mock_s3 = Mock() + mock_s3_class.return_value = mock_s3 + mock_sns = Mock() + mock_sns_class.return_value = mock_sns + mock_sfa_client = Mock() + mock_sfa_factory.return_value.get_instance_from_env.return_value = mock_sfa_client + + # Mock S3 get_child_s3_files to return empty list (no existing files in staging) + mock_s3.get_child_s3_files.return_value = [] + + # Mock S3 copy operations with our custom function + mock_s3.copy_artifact.side_effect = mock_s3_cp + + # Mock S3 upload operations + mock_s3.set_s3_url.return_value = mock_s3 + mock_s3.upload_bytes.side_effect = mock_upload_bytes + + # Create archiver instance + archiver = DaacArchiverCatalia() + + # Set the staged bucket (override the 'TODO' value) + archiver._DaacArchiverCatalia__staged_s3_bucket = self.s3_staged_bucket + + # Set the STAC item to be archived + archiver._DaacArchiverCatalia__archiving_granules_stac = stac_item + + # Call stage_files method + result = archiver.stage_files() + + # Verify the method returns self + self.assertEqual(result, archiver) + + # Get the updated STAC item from the archiver + updated_stac = archiver._DaacArchiverCatalia__archiving_granules_stac + + # Verify that asset URLs were updated to staging locations with timestamp pattern + # Expected pattern: s3:////// + + # Check data asset + data_asset = updated_stac.assets['data'] + data_href = data_asset.href + self.assertTrue(data_href.startswith(f's3://{self.s3_staged_bucket}/{self.collection_id}/{self.item_id}/'), + f"Data asset href should start with staging path: {data_href}") + self.assertTrue(data_href.endswith(f'/{self.test_data_filename}'), + f"Data asset href should end with filename: {data_href}") + + # Extract timestamp portion from the path + # Format: s3://bucket/collection/item-id/timestamp/filename + path_parts = data_href.replace(f's3://{self.s3_staged_bucket}/', '').split('/') + self.assertEqual(len(path_parts), 4, f"Data asset path should have 4 parts: {path_parts}") + self.assertEqual(path_parts[0], self.collection_id, "First path part should be collection ID") + self.assertEqual(path_parts[1], self.item_id, "Second path part should be item ID") + timestamp_part = path_parts[2] + filename_part = path_parts[3] + + # Verify timestamp format: yyyy-MM-ddTHH:mm:ss.fff (ISO 8601 format) + import re + timestamp_pattern = r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3,}$' + self.assertIsNotNone(re.match(timestamp_pattern, timestamp_part), + f"Timestamp should match yyyy-MM-ddTHH:mm:ss.fff format: {timestamp_part}") + self.assertEqual(filename_part, self.test_data_filename, + f"Filename should match original: {filename_part}") + + # Check metadata asset + metadata_asset = updated_stac.assets['metadata'] + metadata_href = metadata_asset.href + self.assertTrue(metadata_href.startswith(f's3://{self.s3_staged_bucket}/{self.collection_id}/{self.item_id}/'), + f"Metadata asset href should start with staging path: {metadata_href}") + self.assertTrue(metadata_href.endswith(f'/{self.test_metadata_filename}'), + f"Metadata asset href should end with filename: {metadata_href}") + + # Extract timestamp from metadata path and verify it matches data asset timestamp + metadata_path_parts = metadata_href.replace(f's3://{self.s3_staged_bucket}/', '').split('/') + metadata_timestamp_part = metadata_path_parts[2] + self.assertEqual(timestamp_part, metadata_timestamp_part, + "Both assets should have the same timestamp in their paths") + + # Verify S3 copy operations were called for each asset with correct staging paths + self.assertEqual(mock_s3.copy_artifact.call_count, 2, "Should copy both data and metadata files") + + # Check that cp was called with the correct staging paths containing timestamps + cp_calls = mock_s3.copy_artifact.call_args_list + + # Verify data file copy + data_cp_call = cp_calls[0][0] # (source_bucket, source_key, dest_bucket, dest_key) + self.assertEqual(data_cp_call[0], self.s3_source_bucket, "Data copy source bucket") + self.assertEqual(data_cp_call[1], self.source_data_key, "Data copy source key") + self.assertEqual(data_cp_call[2], self.s3_staged_bucket, "Data copy dest bucket") + + # Verify destination key has correct format: collection/item-id/timestamp/filename + data_dest_key = data_cp_call[3] + data_dest_parts = data_dest_key.split('/') + self.assertEqual(len(data_dest_parts), 4, f"Data dest key should have 4 parts: {data_dest_parts}") + self.assertEqual(data_dest_parts[0], self.collection_id) + self.assertEqual(data_dest_parts[1], self.item_id) + self.assertIsNotNone(re.match(timestamp_pattern, data_dest_parts[2]), + f"Dest key timestamp should match format: {data_dest_parts[2]}") + self.assertEqual(data_dest_parts[3], self.test_data_filename) + + # Verify metadata file copy + metadata_cp_call = cp_calls[1][0] + self.assertEqual(metadata_cp_call[0], self.s3_source_bucket, "Metadata copy source bucket") + self.assertEqual(metadata_cp_call[1], self.source_metadata_key, "Metadata copy source key") + self.assertEqual(metadata_cp_call[2], self.s3_staged_bucket, "Metadata copy dest bucket") + + metadata_dest_key = metadata_cp_call[3] + metadata_dest_parts = metadata_dest_key.split('/') + self.assertEqual(len(metadata_dest_parts), 4, f"Metadata dest key should have 4 parts: {metadata_dest_parts}") + self.assertEqual(metadata_dest_parts[0], self.collection_id) + self.assertEqual(metadata_dest_parts[1], self.item_id) + self.assertEqual(metadata_dest_parts[2], data_dest_parts[2], "Same timestamp should be used for both files") + self.assertEqual(metadata_dest_parts[3], self.test_metadata_filename) + + # Verify that both assets now point to the same timestamped staging directory + data_staging_dir = '/'.join(data_href.split('/')[:-1]) # Remove filename + metadata_staging_dir = '/'.join(metadata_href.split('/')[:-1]) # Remove filename + self.assertEqual(data_staging_dir, metadata_staging_dir, + "Both assets should be in the same timestamped staging directory") + + print(f"✅ Test passed! Assets staged to: {data_staging_dir}") + return + From 7842a6d0cd855de381bda9bfd4a7187bcec50468 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 4 Nov 2025 16:00:11 -0800 Subject: [PATCH 06/35] feat: keep it to 1 method --- .../test_daac_archiver_catalia.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py index 919489a5..34fed650 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py @@ -13,6 +13,18 @@ class TestDaacArchiverCatalia(TestCase): def setUp(self): + return + + + def test_stage_files_01(self): + """ + Test stage_files method with complete workflow: + 1. Creates actual dummy data and metadata files in temp directories + 2. Creates STAC item with assets pointing to source S3 locations + 3. Calls stage_files method + 4. Verifies files are copied to staging bucket with correct content + 5. Verifies STAC metadata has updated asset URLs and correct content + """ """Set up test fixtures before each test method.""" self.s3_source_bucket = 'test-source-bucket' # Fill this with actual bucket name later self.s3_staged_bucket = 'test-staged-bucket' # Fill this with actual staged bucket name later @@ -39,17 +51,6 @@ def setUp(self): self.staged_data_key = f'{self.staging_prefix}{self.test_data_filename}' self.staged_metadata_key = f'{self.staging_prefix}{self.test_metadata_filename}' self.staged_stac_key = f'{self.staging_prefix}{self.item_id}.json' - - def test_stage_files_01(self): - """ - Test stage_files method with complete workflow: - 1. Creates actual dummy data and metadata files in temp directories - 2. Creates STAC item with assets pointing to source S3 locations - 3. Calls stage_files method - 4. Verifies files are copied to staging bucket with correct content - 5. Verifies STAC metadata has updated asset URLs and correct content - """ - with tempfile.TemporaryDirectory() as temp_source_dir, \ tempfile.TemporaryDirectory() as temp_staged_dir: From 2b006a5a2a708c1b4225727c2ad7571d7f4c79d7 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 5 Nov 2025 07:29:47 -0800 Subject: [PATCH 07/35] fix: finish testing update stac with mock stac fast api --- .../test_daac_archiver_catalia.py | 312 ++++++++++++++++++ 1 file changed, 312 insertions(+) diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py index 34fed650..f7b64e0a 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py @@ -261,3 +261,315 @@ def mock_upload_bytes(content, content_type=None): print(f"✅ Test passed! Assets staged to: {data_staging_dir}") return + def test_update_status_01(self): + """ + Test update_status method with progressive status updates: + 1. Creates STAC item without archival extension or status + 2. Adds archival statuses one by one in sequence + 3. Verifies each status is added correctly and in order + 4. Verifies STAC Fast API client update_item is called + """ + # Setup test data + collection_id = 'example-collection' + item_id = f'example-item-{uuid.uuid4().hex[:8]}' + + # Create STAC Item without any archival extension or properties + stac_item = Item( + id=item_id, + geometry={ + "type": "Polygon", + "coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]]] + }, + bbox=[-180, -90, 180, 90], + datetime=datetime.now(), + properties={} # No archival extension or status initially + ) + stac_item.collection_id = collection_id + + # Define status updates to apply in sequence + status_updates = [ + { + "status": "cnm-authorized-success" + }, + { + "status": "cnm-staged-success", + "href": "s3://uds-staging/example-collection/example-item-with-archival-status" + }, + { + "status": "cnm-submit-success" + }, + { + "status": "cnm-receive-failed", + "errorCode": "NETWORK_TIMEOUT", + "errorMessage": "Failed to receive CNM response within timeout period", + "href": "https://example.com/cnm/receive/def456" + } + ] + + # Mock dependencies + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsS3') as mock_s3_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsSns') as mock_sns_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.SFAClientFactory') as mock_sfa_factory: + + # Setup mocks + mock_s3 = Mock() + mock_s3_class.return_value = mock_s3 + mock_sns = Mock() + mock_sns_class.return_value = mock_sns + mock_sfa_client = Mock() + mock_sfa_factory.return_value.get_instance_from_env.return_value = mock_sfa_client + + # Storage for captured SFA client calls + sfa_calls = [] + + # Mock SFA client update_item to capture and verify parameters + def mock_update_item(collection_id, item_id, item, update_whole=True): + # Store the call details for verification + call_info = { + 'collection_id': collection_id, + 'item_id': item_id, + 'item_dict': item.copy() # Make a copy to preserve state + } + sfa_calls.append(call_info) + + # Verify the basic parameters are correct + expected_collection_id = 'example-collection' + expected_item_id = item_id # This will be the generated item_id + + if collection_id != expected_collection_id: + raise AssertionError(f"Expected collection_id '{expected_collection_id}', got '{collection_id}'") + + return item + + mock_sfa_client.update_item.side_effect = mock_update_item + + # Create archiver instance + archiver = DaacArchiverCatalia() + + # Set the STAC item to be updated + archiver._DaacArchiverCatalia__archiving_granules_stac = stac_item + + # Verify initial state - no archival extension or status + initial_stac = archiver._DaacArchiverCatalia__archiving_granules_stac + self.assertNotIn('archival:status', initial_stac.properties, + "Initially should have no archival:status property") + + # Check if stac_extensions exists and if it does, verify no archival extension + if hasattr(initial_stac, 'stac_extensions') and initial_stac.stac_extensions: + self.assertNotIn(archiver._DaacArchiverCatalia__archiving_status_extension_url, initial_stac.stac_extensions, + "Initially should have no archival extension") + archiver.add_archival_extension() + # Apply status updates one by one and verify each + for i, status_update in enumerate(status_updates): + # Call update_status with the current status + result = archiver.update_status(status_update) + + # Verify method returns self + self.assertEqual(result, archiver, f"update_status should return self (iteration {i+1})") + + # Get updated STAC item + updated_stac = archiver._DaacArchiverCatalia__archiving_granules_stac + + # Verify archival extension was added (should happen on first call) + self.assertIn(archiver._DaacArchiverCatalia__archiving_status_extension_url, updated_stac.stac_extensions, + f"Archival extension should be present after update {i+1}") + + # Verify archival:status property exists and is a list + self.assertIn('archival:status', updated_stac.properties, + f"archival:status property should exist after update {i+1}") + archival_statuses = updated_stac.properties['archival:status'] + self.assertIsInstance(archival_statuses, list, + f"archival:status should be a list after update {i+1}") + + # Verify correct number of status entries + expected_count = i + 1 + self.assertEqual(len(archival_statuses), expected_count, + f"Should have {expected_count} status entries after update {i+1}") + + # Verify the latest status was added correctly + latest_status = archival_statuses[-1] + + # Check required status field + self.assertEqual(latest_status['status'], status_update['status'], + f"Status field should match for update {i+1}") + + # Check optional fields if present in the update + if 'href' in status_update: + self.assertEqual(latest_status['href'], status_update['href'], + f"href field should match for update {i+1}") + + if 'errorCode' in status_update: + self.assertEqual(latest_status['errorCode'], status_update['errorCode'], + f"errorCode field should match for update {i+1}") + + if 'errorMessage' in status_update: + self.assertEqual(latest_status['errorMessage'], status_update['errorMessage'], + f"errorMessage field should match for update {i+1}") + + # Verify datetime was automatically added + self.assertIn('datetime', latest_status, + f"datetime should be automatically added for update {i+1}") + + # Verify timestamp format (should end with 'Z' for UTC) + timestamp = latest_status['datetime'] + self.assertTrue(timestamp.endswith('Z'), + f"timestamp should end with 'Z' for update {i+1}: {timestamp}") + + # Verify SFA client update_item was called + expected_call_count = i + 1 + self.assertEqual(mock_sfa_client.update_item.call_count, expected_call_count, + f"SFA client update_item should be called {expected_call_count} times") + + # Verify the captured SFA call details + self.assertEqual(len(sfa_calls), expected_call_count, + f"Should have captured {expected_call_count} SFA calls") + + # Get the latest SFA call details + latest_sfa_call = sfa_calls[-1] + + # Verify call parameters + self.assertEqual(latest_sfa_call['collection_id'], collection_id, + f"SFA call collection_id should be correct for update {i+1}") + self.assertEqual(latest_sfa_call['item_id'], item_id, + f"SFA call item_id should be correct for update {i+1}") + + # Verify the item_dict content that was sent to SFA client + sent_item_dict = latest_sfa_call['item_dict'] + self.assertIsInstance(sent_item_dict, dict, + f"SFA call item_dict should be a dict for update {i+1}") + + # Verify basic STAC structure in sent item_dict + self.assertEqual(sent_item_dict['id'], item_id, + f"SFA item_dict should have correct id for update {i+1}") + self.assertEqual(sent_item_dict['collection'], collection_id, + f"SFA item_dict should have correct collection for update {i+1}") + + # Verify archival extension was added to sent item_dict + self.assertIn('stac_extensions', sent_item_dict, + f"SFA item_dict should have stac_extensions for update {i+1}") + self.assertIn(archiver._DaacArchiverCatalia__archiving_status_extension_url, sent_item_dict['stac_extensions'], + f"SFA item_dict should have archival extension for update {i+1}") + + # Verify archival:status property in sent item_dict + self.assertIn('properties', sent_item_dict, + f"SFA item_dict should have properties for update {i+1}") + properties = sent_item_dict['properties'] + self.assertIn('archival:status', properties, + f"SFA item_dict properties should have archival:status for update {i+1}") + + # Verify archival:status content in sent item_dict + sent_archival_statuses = properties['archival:status'] + self.assertIsInstance(sent_archival_statuses, list, + f"SFA item_dict archival:status should be a list for update {i+1}") + self.assertEqual(len(sent_archival_statuses), expected_count, + f"SFA item_dict should have {expected_count} status entries for update {i+1}") + + # Verify the latest status in sent item_dict matches what we just added + sent_latest_status = sent_archival_statuses[-1] + self.assertEqual(sent_latest_status['status'], status_update['status'], + f"SFA item_dict latest status should match for update {i+1}") + + # Verify datetime was added to sent item_dict + self.assertIn('datetime', sent_latest_status, + f"SFA item_dict latest status should have datetime for update {i+1}") + sent_timestamp = sent_latest_status['datetime'] + self.assertTrue(sent_timestamp.endswith('Z'), + f"SFA item_dict timestamp should end with 'Z' for update {i+1}: {sent_timestamp}") + + # Verify optional fields in sent item_dict + if 'href' in status_update: + self.assertIn('href', sent_latest_status, + f"SFA item_dict should have href for update {i+1}") + self.assertEqual(sent_latest_status['href'], status_update['href'], + f"SFA item_dict href should match for update {i+1}") + + if 'errorCode' in status_update: + self.assertIn('errorCode', sent_latest_status, + f"SFA item_dict should have errorCode for update {i+1}") + self.assertEqual(sent_latest_status['errorCode'], status_update['errorCode'], + f"SFA item_dict errorCode should match for update {i+1}") + + if 'errorMessage' in status_update: + self.assertIn('errorMessage', sent_latest_status, + f"SFA item_dict should have errorMessage for update {i+1}") + self.assertEqual(sent_latest_status['errorMessage'], status_update['errorMessage'], + f"SFA item_dict errorMessage should match for update {i+1}") + + # Final verification - check all statuses are in correct order + final_stac = archiver._DaacArchiverCatalia__archiving_granules_stac + final_statuses = final_stac.properties['archival:status'] + + # Verify all status values are in the correct sequence + expected_status_sequence = [update['status'] for update in status_updates] + actual_status_sequence = [status['status'] for status in final_statuses] + self.assertEqual(actual_status_sequence, expected_status_sequence, + "Status updates should be in the correct order") + + # Verify statuses with href field have correct href values + statuses_with_href = [(i, status) for i, status in enumerate(final_statuses) if 'href' in status] + expected_hrefs = [ + (1, "s3://uds-staging/example-collection/example-item-with-archival-status"), # cnm-staged-success + (3, "https://example.com/cnm/receive/def456") # cnm-receive-failed + ] + + for status_index, expected_href in expected_hrefs: + found_status = final_statuses[status_index] + self.assertEqual(found_status['href'], expected_href, + f"Status at index {status_index} should have href: {expected_href}") + + # Verify error information for failed status + failed_status = final_statuses[3] # cnm-receive-failed + self.assertEqual(failed_status['errorCode'], "NETWORK_TIMEOUT") + self.assertEqual(failed_status['errorMessage'], "Failed to receive CNM response within timeout period") + + # Final verification of the last item_dict sent to SFA client + final_sfa_call = sfa_calls[-1] + final_sent_item_dict = final_sfa_call['item_dict'] + + # Verify the final sent item_dict has all status updates in correct order + final_sent_properties = final_sent_item_dict['properties'] + final_sent_statuses = final_sent_properties['archival:status'] + + # Verify all status values are in the correct sequence in sent item_dict + final_sent_status_sequence = [status['status'] for status in final_sent_statuses] + self.assertEqual(final_sent_status_sequence, expected_status_sequence, + "Status updates should be in correct order in final sent item_dict") + + # Verify specific statuses in final sent item_dict + # Status 0: cnm-authorized-success (basic status only) + sent_status_0 = final_sent_statuses[0] + self.assertEqual(sent_status_0['status'], "cnm-authorized-success") + self.assertIn('datetime', sent_status_0) + self.assertNotIn('href', sent_status_0) + self.assertNotIn('errorCode', sent_status_0) + + # Status 1: cnm-staged-success (with href) + sent_status_1 = final_sent_statuses[1] + self.assertEqual(sent_status_1['status'], "cnm-staged-success") + self.assertEqual(sent_status_1['href'], "s3://uds-staging/example-collection/example-item-with-archival-status") + self.assertIn('datetime', sent_status_1) + self.assertNotIn('errorCode', sent_status_1) + + # Status 2: cnm-submit-success (basic status only) + sent_status_2 = final_sent_statuses[2] + self.assertEqual(sent_status_2['status'], "cnm-submit-success") + self.assertIn('datetime', sent_status_2) + self.assertNotIn('href', sent_status_2) + self.assertNotIn('errorCode', sent_status_2) + + # Status 3: cnm-receive-failed (with href and error details) + sent_status_3 = final_sent_statuses[3] + self.assertEqual(sent_status_3['status'], "cnm-receive-failed") + self.assertEqual(sent_status_3['href'], "https://example.com/cnm/receive/def456") + self.assertEqual(sent_status_3['errorCode'], "NETWORK_TIMEOUT") + self.assertEqual(sent_status_3['errorMessage'], "Failed to receive CNM response within timeout period") + self.assertIn('datetime', sent_status_3) + + # Verify all statuses have unique timestamps (they should be different due to sequential calls) + sent_timestamps = [status['datetime'] for status in final_sent_statuses] + self.assertEqual(len(sent_timestamps), len(set(sent_timestamps)), + "All status timestamps should be unique in sent item_dict") + + print(f"✅ Test passed! All {len(status_updates)} status updates applied correctly in sequence") + print(f"📤 Verified SFA client received correct item_dict with {len(final_sent_statuses)} status entries") + From 9aa23a45fe37fae75e3c819f668eb8ebace1462e Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 5 Nov 2025 09:26:28 -0800 Subject: [PATCH 08/35] feat: old extract files method copied --- .../daac_archiver/daac_archiver_catalia.py | 109 +++++++++++++++--- 1 file changed, 95 insertions(+), 14 deletions(-) diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index 45607ddb..5878c421 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -57,6 +57,7 @@ def __init__(self): self.__sfa_client = SFAClientFactory().get_instance_from_env() self.__archiving_granules_stac = None self.__archiving_status_extension_url = "https://stac-extensions.github.io/archival_statuses/v1.0.0/schema.json" + self.__cnm_msg_version = "1.6.0" self.__daac_agreements = [] def archive_granule(self, collection_id, granule_id): @@ -259,7 +260,90 @@ def update_status(self, archival_status: dict): LOGGER.error(f'Failed to update STAC item {item_id} in collection {collection_id}: {e}') raise RuntimeError(f'Failed to update STAC item status: {e}') from e + def __extract_files(self, uds_cnm_json: dict, daac_config: dict): + """ + + This method is copied from old source code. + It is expecting cnm_json which has the following structures for interested parts. + { + "product: { + "files": [ + { + "type": "data", + "name": "cc_file.pdf", + "uri": "https://uds-distribution-placeholder/uds-dev-cumulus-unity-staging/URN:NASA:UNITY:UDS_DEV_DEMO:DEV:UDS_UNIT_COLLECTION___2408270830/URN:NASA:UNITY:UDS_DEV_DEMO:DEV:UDS_UNIT_COLLECTION___2408270830:cc_file/cc_file.pdf", + "checksumType": "md5", + "checksum": "unknown", + "size": -1 + }, + ] + } + } + + In this new class, we will be getting the files from STAC granules object which is a dictionary under "assets". + I guess you can ignore the key under the assets and work with actual asset objects. + Example: + + { + "assets": { + "cc_file.pdf": { + "href": "/data/including_dir/daughter/cc_file.pdf", + "title": "cc_file.pdf", + "description": "size=1579135;checksumType=md5;checksum=deb1087d3e614f31b7c9eb461edea93a", + "file:size": 1579135, + "file:checksum": "deb1087d3e614f31b7c9eb461edea93a", + "roles": [ + "data" + ] + }, + } + } + :param uds_cnm_json: + :param daac_config: + :return: + """ + granule_files = uds_cnm_json['product']['files'] + if 'archiving_types' not in daac_config or len(daac_config['archiving_types']) < 1: + return granule_files # TODO remove missing md5? + archiving_types = {k['data_type']: [] if 'file_extension' not in k else k['file_extension'] for k in daac_config['archiving_types']} + result_files = [] + for each_file in granule_files: + LOGGER.debug(f'each_file: {each_file}') + if each_file['type'] not in archiving_types: + continue + file_extensions = archiving_types[each_file['type']] + each_file['uri'] = self.revert_to_s3_url(each_file['uri']) + if len(file_extensions) < 1: + result_files.append(each_file) # TODO remove missing md5? + temp_filename = each_file['name'].upper().strip() + if any([temp_filename.endswith(k.upper()) for k in file_extensions]): + result_files.append(each_file) # TODO remove missing md5? + return result_files def send_daac_sns(self, daac_config): + """ + + { + "product": { + "files": [ + { + "name":"TROPESS_CrIS-JPSS1_L2_Standard_CH4_20250108_MUSES_R1p23_megacity_los_angeles_MGLOS_F2p5_J0.nc", + "type":"data", + "uri":"s3://unity-test-unity-storage/URN:NASA:UNITY:unity:test:TRPSDL2ALLCRS1MGLOS___2/URN:NASA:UNITY:unity:test:TRPSDL2ALLCRS1MGLOS___2:datum/TROPESS_Standard/TRPSDL2ALLCRS1MGLOS.2/2025/01/08/TROPESS_CrIS-JPSS1_L2_Standard_CH4_20250108_MUSES_R1p23_megacity_los_angeles_MGLOS_F2p5_J0/TROPESS_CrIS-JPSS1_L2_Standard_CH4_20250108_MUSES_R1p23_megacity_los_angeles_MGLOS_F2p5_J0.nc", + "size":280595 + } + ], + "name": "TROPESS_CrIS-JPSS1_L2_Standard_CH4_20250108_MUSES_R1p23_megacity_los_angeles_MGLOS_F2p5_J0" + }, + "identifier ":"testIdentifier123456", + "collection": { + "name": "TRPSDL2ALLCRS1MGLOS", + "version": "2" + }, + "provider":"tropess_testing" + } + :param daac_config: + :return: + """ try: self.__sns.set_topic_arn(daac_config['daac_sns_topic_arn']) daac_cnm_message = { @@ -267,28 +351,25 @@ def send_daac_sns(self, daac_config): 'name': daac_config['daac_collection_name'], 'version': daac_config['daac_data_version'], }, - "identifier": uds_cnm_json['identifier'], + "identifier": self.__archiving_granules_stac.id, # Seems like it's the same granule IDuds_cnm_json['identifier'], "submissionTime": f'{TimeUtils.get_current_time()}Z', - "provider": daac_config['daac_provider'] if 'daac_provider' in daac_config else granule_identifier.tenant, - "version": "1.6.0", # TODO this is hardcoded? + "provider": daac_config['daac_provider'], # NOTE: we can't use tenant as provider anymore coz we aren't sure tennt will be there in CATALIA. if 'daac_provider' in daac_config else granule_identifier.tenant + "version": self.__cnm_msg_version, "product": { - "name": granule_identifier.granule, + "name": self.__archiving_granules_stac.id, # NOTE: Original value = granule_identifier.granule. Should be the name of granule. # "dataVersion": daac_config['daac_data_version'], 'files': self.__extract_files(uds_cnm_json, daac_config), } } LOGGER.debug(f'daac_cnm_message: {daac_cnm_message}') self.__sns.set_external_role(daac_config['daac_role_arn'], daac_config['daac_role_session_name']).publish_message(json.dumps(daac_cnm_message), True) - self.__granules_index.update_entry(granule_identifier.tenant, granule_identifier.venue, { - 'archive_status': 'cnm_s_success', - 'archive_error_message': '', - 'archive_error_code': '', - }, uds_cnm_json['identifier']) + self.update_status({ + "status": "cnm-submit-success", + }) except Exception as e: LOGGER.exception(f'failed during archival process') - self.__granules_index.update_entry(granule_identifier.tenant, granule_identifier.venue, { - 'archive_status': 'cnm_s_failed', - 'archive_error_message': str(e), - }, uds_cnm_json['identifier']) - + self.update_status({ + "status": "cnm-submit-failed", + "errorMessage": str(e), + }) return From 71815997dccf7c554ebec3f501ab2c48b4d4b90e Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 5 Nov 2025 09:45:01 -0800 Subject: [PATCH 09/35] feat: update extract_files method --- .../daac_archiver/daac_archiver_catalia.py | 171 +++++++++++++++--- 1 file changed, 143 insertions(+), 28 deletions(-) diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index 5878c421..c42aed10 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -260,34 +260,35 @@ def update_status(self, archival_status: dict): LOGGER.error(f'Failed to update STAC item {item_id} in collection {collection_id}: {e}') raise RuntimeError(f'Failed to update STAC item status: {e}') from e - def __extract_files(self, uds_cnm_json: dict, daac_config: dict): + def extract_files(self, daac_config: dict): """ + Extract files from STAC assets and convert to CNM file format. - This method is copied from old source code. - It is expecting cnm_json which has the following structures for interested parts. + This method has been updated to work with STAC assets instead of CNM JSON. + It extracts files from self.__archiving_granules_stac.assets and converts them + to the CNM file format expected by DAAC. + + Old CNM JSON structure: { "product: { "files": [ { "type": "data", "name": "cc_file.pdf", - "uri": "https://uds-distribution-placeholder/uds-dev-cumulus-unity-staging/URN:NASA:UNITY:UDS_DEV_DEMO:DEV:UDS_UNIT_COLLECTION___2408270830/URN:NASA:UNITY:UDS_DEV_DEMO:DEV:UDS_UNIT_COLLECTION___2408270830:cc_file/cc_file.pdf", + "uri": "s3://bucket/path/cc_file.pdf", "checksumType": "md5", - "checksum": "unknown", - "size": -1 + "checksum": "deb1087d3e614f31b7c9eb461edea93a", + "size": 1579135 }, ] } } - In this new class, we will be getting the files from STAC granules object which is a dictionary under "assets". - I guess you can ignore the key under the assets and work with actual asset objects. - Example: - + STAC assets structure: { "assets": { "cc_file.pdf": { - "href": "/data/including_dir/daughter/cc_file.pdf", + "href": "s3://bucket/path/cc_file.pdf", "title": "cc_file.pdf", "description": "size=1579135;checksumType=md5;checksum=deb1087d3e614f31b7c9eb461edea93a", "file:size": 1579135, @@ -298,27 +299,141 @@ def __extract_files(self, uds_cnm_json: dict, daac_config: dict): }, } } - :param uds_cnm_json: - :param daac_config: - :return: + + :param daac_config: DAAC configuration containing archiving_types + :return: List of files in CNM format """ - granule_files = uds_cnm_json['product']['files'] + if self.__archiving_granules_stac is None: + raise ValueError('NULL archiving granule. Cannot extract files.') + + # Get assets from STAC item + stac_assets = self.__archiving_granules_stac.assets + + # If no archiving types specified, include all assets if 'archiving_types' not in daac_config or len(daac_config['archiving_types']) < 1: - return granule_files # TODO remove missing md5? - archiving_types = {k['data_type']: [] if 'file_extension' not in k else k['file_extension'] for k in daac_config['archiving_types']} + LOGGER.debug('No archiving types specified in DAAC config, including all assets') + return self._convert_all_assets_to_cnm_format(stac_assets) + + # Build archiving types mapping: {data_type: [file_extensions]} + archiving_types = {} + for archiving_type in daac_config['archiving_types']: + data_type = archiving_type['data_type'] + file_extensions = archiving_type.get('file_extension', []) + if not isinstance(file_extensions, list): + file_extensions = [file_extensions] if file_extensions else [] + archiving_types[data_type] = file_extensions + + LOGGER.debug(f'Archiving types configuration: {archiving_types}') + result_files = [] - for each_file in granule_files: - LOGGER.debug(f'each_file: {each_file}') - if each_file['type'] not in archiving_types: + for asset_key, asset in stac_assets.items(): + LOGGER.debug(f'Processing asset: {asset_key}') + + # Get asset type from roles (use first role as type, default to 'data') + asset_type = 'data' # Default type + if hasattr(asset, 'roles') and asset.roles and len(asset.roles) > 0: + asset_type = asset.roles[0] + + # Check if this asset type should be archived + if asset_type not in archiving_types: + LOGGER.debug(f'Asset {asset_key} type "{asset_type}" not in archiving types, skipping') continue - file_extensions = archiving_types[each_file['type']] - each_file['uri'] = self.revert_to_s3_url(each_file['uri']) - if len(file_extensions) < 1: - result_files.append(each_file) # TODO remove missing md5? - temp_filename = each_file['name'].upper().strip() - if any([temp_filename.endswith(k.upper()) for k in file_extensions]): - result_files.append(each_file) # TODO remove missing md5? + + # Get file extensions for this asset type + file_extensions = archiving_types[asset_type] + + # Convert STAC asset to CNM file format + cnm_file = self._convert_stac_asset_to_cnm_file(asset_key, asset) + + # If no file extensions specified for this type, include the file + if len(file_extensions) == 0: + LOGGER.debug(f'No file extensions specified for type "{asset_type}", including asset {asset_key}') + result_files.append(cnm_file) + continue + + # Check if file matches any of the specified extensions + filename = cnm_file['name'].upper().strip() + if any(filename.endswith(ext.upper()) for ext in file_extensions): + LOGGER.debug(f'Asset {asset_key} matches extension filter, including') + result_files.append(cnm_file) + else: + LOGGER.debug(f'Asset {asset_key} does not match extension filter {file_extensions}, skipping') + + LOGGER.info(f'Extracted {len(result_files)} files from {len(stac_assets)} STAC assets') + return result_files + + def _convert_all_assets_to_cnm_format(self, stac_assets: dict): + """Convert all STAC assets to CNM file format without filtering.""" + result_files = [] + for asset_key, asset in stac_assets.items(): + cnm_file = self._convert_stac_asset_to_cnm_file(asset_key, asset) + result_files.append(cnm_file) return result_files + + def _convert_stac_asset_to_cnm_file(self, asset_key: str, asset): + """ + Convert a single STAC asset to CNM file format. + + :param asset_key: The key/name of the asset in STAC + :param asset: The STAC Asset object + :return: Dictionary in CNM file format + """ + # Extract filename from href or use asset_key + filename = asset_key + if hasattr(asset, 'href') and asset.href: + filename = asset.href.split('/')[-1] + + # Get asset type from roles (use first role, default to 'data') + asset_type = 'data' + if hasattr(asset, 'roles') and asset.roles and len(asset.roles) > 0: + asset_type = asset.roles[0] + + # Get file size + file_size = -1 + if hasattr(asset, 'extra_fields') and 'file:size' in asset.extra_fields: + file_size = asset.extra_fields['file:size'] + elif hasattr(asset, 'extra_fields') and 'file_size' in asset.extra_fields: + file_size = asset.extra_fields['file_size'] + + # Get checksum information + checksum_type = 'md5' # Default + checksum_value = 'unknown' # Default + + if hasattr(asset, 'extra_fields'): + if 'file:checksum' in asset.extra_fields: + checksum_value = asset.extra_fields['file:checksum'] + elif 'file_checksum' in asset.extra_fields: + checksum_value = asset.extra_fields['file_checksum'] + + # Try to parse checksum info from description if available + if hasattr(asset, 'description') and asset.description: + desc = asset.description.lower() + if 'checksumtype=' in desc: + # Parse description like "size=1579135;checksumType=md5;checksum=deb1087d3e614f31b7c9eb461edea93a" + parts = desc.split(';') + for part in parts: + if part.startswith('checksumtype='): + checksum_type = part.split('=')[1] + elif part.startswith('checksum='): + checksum_value = part.split('=')[1] + elif part.startswith('size=') and file_size == -1: + try: + file_size = int(part.split('=')[1]) + except ValueError: + pass + + # Build CNM file structure + cnm_file = { + "type": asset_type, + "name": filename, + "uri": asset.href if hasattr(asset, 'href') else '', + "checksumType": checksum_type, + "checksum": checksum_value, + "size": file_size + } + + LOGGER.debug(f'Converted STAC asset {asset_key} to CNM file: {cnm_file}') + return cnm_file def send_daac_sns(self, daac_config): """ @@ -358,7 +473,7 @@ def send_daac_sns(self, daac_config): "product": { "name": self.__archiving_granules_stac.id, # NOTE: Original value = granule_identifier.granule. Should be the name of granule. # "dataVersion": daac_config['daac_data_version'], - 'files': self.__extract_files(uds_cnm_json, daac_config), + 'files': self.extract_files(daac_config), } } LOGGER.debug(f'daac_cnm_message: {daac_cnm_message}') From 1b18ccca08cd5421dc994e7f40f318791ac1bf4d Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 5 Nov 2025 10:25:38 -0800 Subject: [PATCH 10/35] feat; add test case --- docker/docker-compose-web-service.yml | 5 +- .../test_daac_archiver_catalia.py | 229 ++++++++++++++++++ 2 files changed, 232 insertions(+), 2 deletions(-) diff --git a/docker/docker-compose-web-service.yml b/docker/docker-compose-web-service.yml index 3633b0f7..d79fd7db 100644 --- a/docker/docker-compose-web-service.yml +++ b/docker/docker-compose-web-service.yml @@ -1,9 +1,10 @@ version: "3.7" services: cumulus_granules_upload: - image: ghcr.io/unity-sds/unity-data-services:7.10.1 + image: ghcr.io/unity-sds/unity-data-services:9.15.1 + platform: linux/amd64 restart: always - entrypoint: 'uvicorn web_service_stac_browser:app --host 0.0.0.0 --port 8005 --log-level info --reload' + entrypoint: 'uvicorn web_service:app --host 0.0.0.0 --port 8005 --log-level info --reload' tty: true working_dir: '/usr/src/app/unity/cumulus_lambda_functions/uds_api' ports: diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py index f7b64e0a..479b29b2 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py @@ -573,3 +573,232 @@ def mock_update_item(collection_id, item_id, item, update_whole=True): print(f"✅ Test passed! All {len(status_updates)} status updates applied correctly in sequence") print(f"📤 Verified SFA client received correct item_dict with {len(final_sent_statuses)} status entries") + def test_extract_files_01(self): + """ + Test extract_files method with specific DAAC config and STAC assets: + 1. Creates STAC item with various file types (.nc, browse, .xml, .json) + 2. Uses DAAC config with specific archiving types and extensions + 3. Verifies correct files are filtered and converted to CNM format + 4. Checks CNM format structure and field values + """ + # Setup test data + collection_id = 'test-collection-extract' + item_id = f'test-item-extract-{uuid.uuid4().hex[:8]}' + + # Create STAC Item with various assets + stac_item = Item( + id=item_id, + geometry={ + "type": "Polygon", + "coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]]] + }, + bbox=[-180, -90, 180, 90], + datetime=datetime.now(), + properties={} + ) + stac_item.collection_id = collection_id + + # Add assets with different types and extensions + # 1. .nc data file (should be included - matches 'data' type and '.nc' extension) + stac_item.add_asset( + 'data_file.nc', + Asset( + href='s3://test-bucket/path/data_file.nc', + media_type='application/netcdf', + title='NetCDF Data File', + description='size=2048000;checksumType=md5;checksum=abc123def456', + roles=['data'], + extra_fields={ + 'file:size': 2048000, + 'file:checksum': 'abc123def456' + } + ) + ) + + # 2. Browse file (should be included - matches 'browse' type, no extension filter) + stac_item.add_asset( + 'browse_image.png', + Asset( + href='s3://test-bucket/path/browse_image.png', + media_type='image/png', + title='Browse Image', + description='size=512000;checksumType=sha256;checksum=xyz789abc123', + roles=['browse'], + extra_fields={ + 'file:size': 512000, + 'file:checksum': 'xyz789abc123' + } + ) + ) + + # 3. .xml metadata file (should be excluded - no metadata assets match the extensions) + # Actually, let's create a different metadata file to test filtering + stac_item.add_asset( + 'metadata_file.txt', + Asset( + href='s3://test-bucket/path/metadata_file.txt', + media_type='text/plain', + title='Metadata Text File', + description='size=1024;checksumType=md5;checksum=meta123456', + roles=['metadata'], + extra_fields={ + 'file:size': 1024, + 'file:checksum': 'meta123456' + } + ) + ) + + # 4. .json data file (should be included - matches 'data' type and '.json' extension) + stac_item.add_asset( + 'config.json', + Asset( + href='s3://test-bucket/path/config.json', + media_type='application/json', + title='Configuration JSON', + description='size=4096;checksumType=md5;checksum=json987654', + roles=['data'], + extra_fields={ + 'file:size': 4096, + 'file:checksum': 'json987654' + } + ) + ) + + # 5. .tif data file (should be excluded - 'data' type but wrong extension) + stac_item.add_asset( + 'image.tif', + Asset( + href='s3://test-bucket/path/image.tif', + media_type='image/tiff', + title='TIFF Image', + description='size=8192000;checksumType=md5;checksum=tiff111222', + roles=['data'], + extra_fields={ + 'file:size': 8192000, + 'file:checksum': 'tiff111222' + } + ) + ) + + # Define DAAC config with specific archiving types + daac_config = { + 'daac_collection_name': 'TEST_COLLECTION', + 'daac_data_version': '1.0', + 'daac_provider': 'test_provider', + 'archiving_types': [ + {'data_type': 'data', 'file_extension': ['.json', '.nc']}, + {'data_type': 'metadata', 'file_extension': ['.xml']}, + {'data_type': 'browse'}, # No file_extension means all files of this type + ], + } + + # Mock dependencies + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsS3') as mock_s3_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsSns') as mock_sns_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.SFAClientFactory') as mock_sfa_factory: + + # Setup mocks + mock_s3 = Mock() + mock_s3_class.return_value = mock_s3 + mock_sns = Mock() + mock_sns_class.return_value = mock_sns + mock_sfa_client = Mock() + mock_sfa_factory.return_value.get_instance_from_env.return_value = mock_sfa_client + + # Create archiver instance + archiver = DaacArchiverCatalia() + + # Set the STAC item + archiver._DaacArchiverCatalia__archiving_granules_stac = stac_item + + # Call extract_files method + extracted_files = archiver.extract_files(daac_config) + + # Verify correct number of files extracted + # Expected: data_file.nc, browse_image.png, config.json (3 files) + # Excluded: metadata_file.txt (.txt not in .xml filter), image.tif (.tif not in data extensions) + expected_file_count = 3 + self.assertEqual(len(extracted_files), expected_file_count, + f"Should extract {expected_file_count} files, got {len(extracted_files)}") + + # Verify each extracted file has correct CNM format structure + for cnm_file in extracted_files: + # Check required CNM fields are present + self.assertIn('type', cnm_file, "CNM file should have 'type' field") + self.assertIn('name', cnm_file, "CNM file should have 'name' field") + self.assertIn('uri', cnm_file, "CNM file should have 'uri' field") + self.assertIn('checksumType', cnm_file, "CNM file should have 'checksumType' field") + self.assertIn('checksum', cnm_file, "CNM file should have 'checksum' field") + self.assertIn('size', cnm_file, "CNM file should have 'size' field") + + # Check field types + self.assertIsInstance(cnm_file['type'], str, "'type' should be string") + self.assertIsInstance(cnm_file['name'], str, "'name' should be string") + self.assertIsInstance(cnm_file['uri'], str, "'uri' should be string") + self.assertIsInstance(cnm_file['checksumType'], str, "'checksumType' should be string") + self.assertIsInstance(cnm_file['checksum'], str, "'checksum' should be string") + self.assertIsInstance(cnm_file['size'], int, "'size' should be integer") + + # Create a map of extracted files by name for easier verification + extracted_files_by_name = {cnm_file['name']: cnm_file for cnm_file in extracted_files} + + # Verify specific files were included with correct values + # 1. Verify data_file.nc was included + self.assertIn('data_file.nc', extracted_files_by_name, "data_file.nc should be included") + nc_file = extracted_files_by_name['data_file.nc'] + self.assertEqual(nc_file['type'], 'data', "NC file should have type 'data'") + self.assertEqual(nc_file['uri'], 's3://test-bucket/path/data_file.nc', "NC file URI should match") + self.assertEqual(nc_file['size'], 2048000, "NC file size should match") + self.assertEqual(nc_file['checksum'], 'abc123def456', "NC file checksum should match") + self.assertEqual(nc_file['checksumType'], 'md5', "NC file checksum type should be md5") + + # 2. Verify browse_image.png was included + self.assertIn('browse_image.png', extracted_files_by_name, "browse_image.png should be included") + browse_file = extracted_files_by_name['browse_image.png'] + self.assertEqual(browse_file['type'], 'browse', "Browse file should have type 'browse'") + self.assertEqual(browse_file['uri'], 's3://test-bucket/path/browse_image.png', "Browse file URI should match") + self.assertEqual(browse_file['size'], 512000, "Browse file size should match") + self.assertEqual(browse_file['checksum'], 'xyz789abc123', "Browse file checksum should match") + self.assertEqual(browse_file['checksumType'], 'sha256', "Browse file checksum type should be sha256") + + # 3. Verify config.json was included + self.assertIn('config.json', extracted_files_by_name, "config.json should be included") + json_file = extracted_files_by_name['config.json'] + self.assertEqual(json_file['type'], 'data', "JSON file should have type 'data'") + self.assertEqual(json_file['uri'], 's3://test-bucket/path/config.json', "JSON file URI should match") + self.assertEqual(json_file['size'], 4096, "JSON file size should match") + self.assertEqual(json_file['checksum'], 'json987654', "JSON file checksum should match") + self.assertEqual(json_file['checksumType'], 'md5', "JSON file checksum type should be md5") + + # 4. Verify excluded files are NOT present + self.assertNotIn('metadata_file.txt', extracted_files_by_name, + "metadata_file.txt should be excluded (wrong extension)") + self.assertNotIn('image.tif', extracted_files_by_name, + "image.tif should be excluded (wrong extension for data type)") + + # Verify filtering logic worked correctly + expected_files = {'data_file.nc', 'browse_image.png', 'config.json'} + actual_files = set(extracted_files_by_name.keys()) + self.assertEqual(actual_files, expected_files, + f"Extracted files should match expected. Expected: {expected_files}, Got: {actual_files}") + + # Verify URI format (should all be S3 URLs) + for cnm_file in extracted_files: + self.assertTrue(cnm_file['uri'].startswith('s3://'), + f"URI should be S3 URL: {cnm_file['uri']}") + + # Verify sizes are positive + for cnm_file in extracted_files: + self.assertGreater(cnm_file['size'], 0, + f"File size should be positive: {cnm_file['name']} has size {cnm_file['size']}") + + # Verify checksums are not 'unknown' for our test files + for cnm_file in extracted_files: + self.assertNotEqual(cnm_file['checksum'], 'unknown', + f"Checksum should be extracted from STAC for: {cnm_file['name']}") + + print(f"✅ Test passed! Extracted {len(extracted_files)} files with correct filtering:") + for cnm_file in extracted_files: + print(f" - {cnm_file['name']} (type: {cnm_file['type']}, size: {cnm_file['size']})") + print(f"📋 Filtering worked correctly: included expected files, excluded non-matching files") + From c418d0afa8187a363c08a916cd65925173a1a8db Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 5 Nov 2025 10:40:39 -0800 Subject: [PATCH 11/35] feat: more test case --- .../test_daac_archiver_catalia.py | 276 ++++++++++++++++++ 1 file changed, 276 insertions(+) diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py index 479b29b2..9aaf94d9 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py @@ -802,3 +802,279 @@ def test_extract_files_01(self): print(f" - {cnm_file['name']} (type: {cnm_file['type']}, size: {cnm_file['size']})") print(f"📋 Filtering worked correctly: included expected files, excluded non-matching files") + def test_extract_files_02(self): + """ + Test extract_files method with NO archiving_types in DAAC config: + 1. Creates STAC item with various file types and extensions + 2. Uses DAAC config WITHOUT archiving_types field + 3. Verifies ALL files are extracted (no filtering) + 4. Checks CNM format structure for all files + """ + # Setup test data + collection_id = 'test-collection-no-filter' + item_id = f'test-item-no-filter-{uuid.uuid4().hex[:8]}' + + # Create STAC Item with various assets + stac_item = Item( + id=item_id, + geometry={ + "type": "Polygon", + "coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]]] + }, + bbox=[-180, -90, 180, 90], + datetime=datetime.now(), + properties={} + ) + stac_item.collection_id = collection_id + + # Add diverse set of assets - all should be included + # 1. NetCDF data file + stac_item.add_asset( + 'science_data.nc', + Asset( + href='s3://test-bucket/data/science_data.nc', + media_type='application/netcdf', + title='Science Data NetCDF', + description='size=5242880;checksumType=md5;checksum=science123abc', + roles=['data'], + extra_fields={ + 'file:size': 5242880, + 'file:checksum': 'science123abc' + } + ) + ) + + # 2. XML metadata file + stac_item.add_asset( + 'metadata.xml', + Asset( + href='s3://test-bucket/metadata/metadata.xml', + media_type='application/xml', + title='Granule Metadata', + description='size=8192;checksumType=sha1;checksum=meta456def', + roles=['metadata'], + extra_fields={ + 'file:size': 8192, + 'file:checksum': 'meta456def' + } + ) + ) + + # 3. Browse image + stac_item.add_asset( + 'quicklook.jpg', + Asset( + href='s3://test-bucket/browse/quicklook.jpg', + media_type='image/jpeg', + title='Browse Image', + description='size=204800;checksumType=md5;checksum=browse789ghi', + roles=['browse'], + extra_fields={ + 'file:size': 204800, + 'file:checksum': 'browse789ghi' + } + ) + ) + + # 4. Documentation file + stac_item.add_asset( + 'readme.txt', + Asset( + href='s3://test-bucket/docs/readme.txt', + media_type='text/plain', + title='Documentation', + description='size=2048;checksumType=sha256;checksum=docs123jkl', + roles=['documentation'], + extra_fields={ + 'file:size': 2048, + 'file:checksum': 'docs123jkl' + } + ) + ) + + # 5. Configuration JSON + stac_item.add_asset( + 'processing_params.json', + Asset( + href='s3://test-bucket/config/processing_params.json', + media_type='application/json', + title='Processing Parameters', + description='size=1024;checksumType=md5;checksum=config456mno', + roles=['data'], + extra_fields={ + 'file:size': 1024, + 'file:checksum': 'config456mno' + } + ) + ) + + # 6. Binary data file with unusual extension + stac_item.add_asset( + 'calibration.cal', + Asset( + href='s3://test-bucket/cal/calibration.cal', + media_type='application/octet-stream', + title='Calibration Data', + description='size=16384;checksumType=sha256;checksum=cal789pqr', + roles=['data'], + extra_fields={ + 'file:size': 16384, + 'file:checksum': 'cal789pqr' + } + ) + ) + + # Define DAAC config WITHOUT archiving_types field + daac_config = { + 'daac_collection_name': 'TEST_COLLECTION_ALL', + 'daac_data_version': '2.0', + 'daac_provider': 'test_provider_all' + # NOTE: No 'archiving_types' field - should extract all files + } + + # Mock dependencies + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsS3') as mock_s3_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsSns') as mock_sns_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.SFAClientFactory') as mock_sfa_factory: + + # Setup mocks + mock_s3 = Mock() + mock_s3_class.return_value = mock_s3 + mock_sns = Mock() + mock_sns_class.return_value = mock_sns + mock_sfa_client = Mock() + mock_sfa_factory.return_value.get_instance_from_env.return_value = mock_sfa_client + + # Create archiver instance + archiver = DaacArchiverCatalia() + + # Set the STAC item + archiver._DaacArchiverCatalia__archiving_granules_stac = stac_item + + # Call extract_files method + extracted_files = archiver.extract_files(daac_config) + + # Verify ALL files were extracted (no filtering) + expected_file_count = 6 # All 6 assets should be included + self.assertEqual(len(extracted_files), expected_file_count, + f"Should extract ALL {expected_file_count} files when no archiving_types specified, got {len(extracted_files)}") + + # Verify each extracted file has correct CNM format structure + for cnm_file in extracted_files: + # Check required CNM fields are present + self.assertIn('type', cnm_file, "CNM file should have 'type' field") + self.assertIn('name', cnm_file, "CNM file should have 'name' field") + self.assertIn('uri', cnm_file, "CNM file should have 'uri' field") + self.assertIn('checksumType', cnm_file, "CNM file should have 'checksumType' field") + self.assertIn('checksum', cnm_file, "CNM file should have 'checksum' field") + self.assertIn('size', cnm_file, "CNM file should have 'size' field") + + # Check field types + self.assertIsInstance(cnm_file['type'], str, "'type' should be string") + self.assertIsInstance(cnm_file['name'], str, "'name' should be string") + self.assertIsInstance(cnm_file['uri'], str, "'uri' should be string") + self.assertIsInstance(cnm_file['checksumType'], str, "'checksumType' should be string") + self.assertIsInstance(cnm_file['checksum'], str, "'checksum' should be string") + self.assertIsInstance(cnm_file['size'], int, "'size' should be integer") + + # Create a map of extracted files by name for easier verification + extracted_files_by_name = {cnm_file['name']: cnm_file for cnm_file in extracted_files} + + # Verify ALL assets were included with correct values + expected_files = { + 'science_data.nc', 'metadata.xml', 'quicklook.jpg', + 'readme.txt', 'processing_params.json', 'calibration.cal' + } + actual_files = set(extracted_files_by_name.keys()) + self.assertEqual(actual_files, expected_files, + f"All files should be extracted. Expected: {expected_files}, Got: {actual_files}") + + # Verify specific files with their expected properties + # 1. NetCDF file + nc_file = extracted_files_by_name['science_data.nc'] + self.assertEqual(nc_file['type'], 'data') + self.assertEqual(nc_file['uri'], 's3://test-bucket/data/science_data.nc') + self.assertEqual(nc_file['size'], 5242880) + self.assertEqual(nc_file['checksum'], 'science123abc') + self.assertEqual(nc_file['checksumType'], 'md5') + + # 2. XML metadata file + xml_file = extracted_files_by_name['metadata.xml'] + self.assertEqual(xml_file['type'], 'metadata') + self.assertEqual(xml_file['uri'], 's3://test-bucket/metadata/metadata.xml') + self.assertEqual(xml_file['size'], 8192) + self.assertEqual(xml_file['checksum'], 'meta456def') + self.assertEqual(xml_file['checksumType'], 'sha1') + + # 3. Browse image + browse_file = extracted_files_by_name['quicklook.jpg'] + self.assertEqual(browse_file['type'], 'browse') + self.assertEqual(browse_file['uri'], 's3://test-bucket/browse/quicklook.jpg') + self.assertEqual(browse_file['size'], 204800) + self.assertEqual(browse_file['checksum'], 'browse789ghi') + self.assertEqual(browse_file['checksumType'], 'md5') + + # 4. Documentation file + docs_file = extracted_files_by_name['readme.txt'] + self.assertEqual(docs_file['type'], 'documentation') + self.assertEqual(docs_file['uri'], 's3://test-bucket/docs/readme.txt') + self.assertEqual(docs_file['size'], 2048) + self.assertEqual(docs_file['checksum'], 'docs123jkl') + self.assertEqual(docs_file['checksumType'], 'sha256') + + # 5. JSON config file + json_file = extracted_files_by_name['processing_params.json'] + self.assertEqual(json_file['type'], 'data') + self.assertEqual(json_file['uri'], 's3://test-bucket/config/processing_params.json') + self.assertEqual(json_file['size'], 1024) + self.assertEqual(json_file['checksum'], 'config456mno') + self.assertEqual(json_file['checksumType'], 'md5') + + # 6. Binary calibration file + cal_file = extracted_files_by_name['calibration.cal'] + self.assertEqual(cal_file['type'], 'data') + self.assertEqual(cal_file['uri'], 's3://test-bucket/cal/calibration.cal') + self.assertEqual(cal_file['size'], 16384) + self.assertEqual(cal_file['checksum'], 'cal789pqr') + self.assertEqual(cal_file['checksumType'], 'sha256') + + # Verify variety of asset types were preserved + extracted_types = {cnm_file['type'] for cnm_file in extracted_files} + expected_types = {'data', 'metadata', 'browse', 'documentation'} + self.assertEqual(extracted_types, expected_types, + f"Should preserve all asset types. Expected: {expected_types}, Got: {extracted_types}") + + # Verify variety of checksum types were preserved + extracted_checksum_types = {cnm_file['checksumType'] for cnm_file in extracted_files} + expected_checksum_types = {'md5', 'sha1', 'sha256'} + self.assertEqual(extracted_checksum_types, expected_checksum_types, + f"Should preserve all checksum types. Expected: {expected_checksum_types}, Got: {extracted_checksum_types}") + + # Verify file extensions variety (no filtering applied) + extracted_extensions = {cnm_file['name'].split('.')[-1] for cnm_file in extracted_files} + expected_extensions = {'nc', 'xml', 'jpg', 'txt', 'json', 'cal'} + self.assertEqual(extracted_extensions, expected_extensions, + f"Should include all file extensions. Expected: {expected_extensions}, Got: {extracted_extensions}") + + # Verify URI format (should all be S3 URLs) + for cnm_file in extracted_files: + self.assertTrue(cnm_file['uri'].startswith('s3://'), + f"URI should be S3 URL: {cnm_file['uri']}") + + # Verify sizes are positive + for cnm_file in extracted_files: + self.assertGreater(cnm_file['size'], 0, + f"File size should be positive: {cnm_file['name']} has size {cnm_file['size']}") + + # Verify no 'unknown' checksums (all extracted from STAC) + for cnm_file in extracted_files: + self.assertNotEqual(cnm_file['checksum'], 'unknown', + f"Checksum should be extracted from STAC for: {cnm_file['name']}") + + print(f"✅ Test passed! Extracted ALL {len(extracted_files)} files (no filtering applied):") + for cnm_file in extracted_files: + print(f" - {cnm_file['name']} (type: {cnm_file['type']}, size: {cnm_file['size']}, checksum: {cnm_file['checksumType']})") + print(f"📂 No archiving_types filter: All asset types and extensions included") + print(f"🎯 Asset types found: {sorted(extracted_types)}") + print(f"🔐 Checksum types found: {sorted(extracted_checksum_types)}") + From ab3d8ecf0c05bcb29eaa43ecd2911417e1149699 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Mon, 24 Nov 2025 15:20:55 -0800 Subject: [PATCH 12/35] feat: ddb for authorizer --- .../daac_archiver/catalia_auth_db.py | 108 ++++++++++++++++++ requirements.txt | 2 +- .../daac_archiver/test_catalia_auth_db.py | 21 ++++ 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 cumulus_lambda_functions/daac_archiver/catalia_auth_db.py create mode 100644 tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py diff --git a/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py new file mode 100644 index 00000000..9714d0c8 --- /dev/null +++ b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py @@ -0,0 +1,108 @@ +import re +from mdps_ds_lib.lib.aws.no_sql_abstract import NoSqlProps +from mdps_ds_lib.lib.aws.no_sql_ddb import NoSqlDdb +from mdps_ds_lib.lib.aws.no_sql_factory import NoSqlFactory + + +class CataliaAuthDb: + def __init__(self, table_name: str): + ddb_props = NoSqlProps() + ddb_props.table = table_name + ddb_props.primary_key = 'userGroup' + ddb_props.secondary_key = 'projectMap' + + param = ddb_props.to_json() + param['file_repo'] = 'AWS_DDB' + + self.__ddb: NoSqlDdb = NoSqlFactory().get_instance(**param) + + def add(self, user_group, collection, daac_collection, access: bool): + item1 = { + 'userGroup': user_group, + 'projectMap': '', + 'sourceProject': collection, + 'targetProject': daac_collection, + 'access': access, + } + sk1 = f'{item1["sourceProject"]}->{item1["targetProject"]}' + self.__ddb.add(item1['userGroup'], sk1, item1, replace=True) + return + + def authorize(self, user_group, catalia_collection, daac_collection): + """ + This will retrieve entries from DDB with user_group as PK. (This is done). + If results is None or empty array, return False. Not Authorized. + For each result, check sourceProject REGEX against catalia_collection input name. + If results is None or empty array, return False. Not Authorized. + For each result, check targetProject REGEX against daac_collection input name. + If results is None or empty array, return False. Not Authorized. + If only 1 row exists, return its "access" key. + If two or more rows exist, pick the one closest to the daac_collection in the targetProject name. + If only 1 row exists, return its "access" key. + If two or more rows exist, pick the one closest to the catalia_collection in the sourceProject. + If only 1 row exists, return its "access" key. + :param user_group: + :param catalia_collection: + :param daac_collection: + :return: + """ + results = self.__ddb.get(user_group, secondary_key=None) + + if not results or len(results) == 0: + return False + + source_matches = [] + for result in results: + source_pattern = result.get('sourceProject', '') + try: + if re.match(source_pattern, catalia_collection): + source_matches.append(result) + except re.error: + if source_pattern == catalia_collection: + source_matches.append(result) + + if not source_matches or len(source_matches) == 0: + return False + + target_matches = [] + for result in source_matches: + target_pattern = result.get('targetProject', '') + try: + if re.match(target_pattern, daac_collection, re.IGNORECASE): + target_matches.append(result) + except re.error: + if target_pattern == daac_collection: + target_matches.append(result) + + if not target_matches or len(target_matches) == 0: + return False + + if len(target_matches) == 1: + return target_matches[0].get('access', False) + + closest_target = min(target_matches, key=lambda x: self._string_distance(x.get('targetProject', ''), daac_collection)) + + final_matches = [r for r in target_matches if r.get('targetProject') == closest_target.get('targetProject')] + + if len(final_matches) == 1: + return final_matches[0].get('access', False) + + closest_source = min(final_matches, key=lambda x: self._string_distance(x.get('sourceProject', ''), catalia_collection)) + + return closest_source.get('access', False) + + def _string_distance(self, s1, s2): + """ + Calculate the negative of the maximum common prefix length (case insensitive). + Returns negative value so that longer prefixes result in smaller distances for min() selection. + """ + s1_lower = s1.lower() + s2_lower = s2.lower() + + common_prefix_length = 0 + for i in range(min(len(s1_lower), len(s2_lower))): + if s1_lower[i] == s2_lower[i]: + common_prefix_length += 1 + else: + break + return -common_prefix_length diff --git a/requirements.txt b/requirements.txt index 5222289d..efe26b00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ jsonschema==4.23.0 jsonschema-specifications==2023.12.1 lark==0.12.0 mangum==0.18.0 -mdps-ds-lib==1.2.0.dev200 +mdps-ds-lib==1.2.0.dev300 pydantic==2.9.2 pydantic_core==2.23.4 pygeofilter==0.2.4 diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py b/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py new file mode 100644 index 00000000..eef31829 --- /dev/null +++ b/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py @@ -0,0 +1,21 @@ +from unittest import TestCase + +from cumulus_lambda_functions.daac_archiver.catalia_auth_db import CataliaAuthDb + + +class TestCataliaAuthDb(TestCase): + def test_01(self): + cad = CataliaAuthDb('h5s_on_disk_william_local') + cad.add('A', 'X:Y:.*', '.*', False) + cad.add('A', 'X:Y:L0.*', 'M:N:L0.*', False) + cad.add('A', 'X:Y:L0_V1', 'M:N:L0.*', False) + cad.add('A', 'X:Y:L0.*', 'M:N:L0.*', True) + cad.add('A', 'X:Y:L1_V1', 'M:N:L1.*', True) + + self.assertFalse(cad.authorize('B', 'X', 'X')) + self.assertFalse(cad.authorize('A', 'X:Y:L0_V1', 'M:N:L0_V1')) + self.assertFalse(cad.authorize('A', 'X:Y:L1_V1', 'M:N:L0_V1')) + self.assertTrue(cad.authorize('A', 'X:Y:L1_V1', 'M:N:L1_V1')) + self.assertTrue(cad.authorize('A', 'X:Y:L0_V2', 'M:N:L0_V1')) + self.assertFalse(cad.authorize('A', 'X:Y:L0_V2', 'M:N:L1_V1')) + return From f3f82765bea9ad5145973ef7099cb3931231bf47 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Mon, 24 Nov 2025 15:24:53 -0800 Subject: [PATCH 13/35] fix: case insensitivity --- cumulus_lambda_functions/daac_archiver/catalia_auth_db.py | 2 +- .../daac_archiver/test_catalia_auth_db.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py index 9714d0c8..826d7c8c 100644 --- a/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py +++ b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py @@ -55,7 +55,7 @@ def authorize(self, user_group, catalia_collection, daac_collection): for result in results: source_pattern = result.get('sourceProject', '') try: - if re.match(source_pattern, catalia_collection): + if re.match(source_pattern, catalia_collection, re.IGNORECASE): source_matches.append(result) except re.error: if source_pattern == catalia_collection: diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py b/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py index eef31829..8f570145 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py @@ -16,6 +16,6 @@ def test_01(self): self.assertFalse(cad.authorize('A', 'X:Y:L0_V1', 'M:N:L0_V1')) self.assertFalse(cad.authorize('A', 'X:Y:L1_V1', 'M:N:L0_V1')) self.assertTrue(cad.authorize('A', 'X:Y:L1_V1', 'M:N:L1_V1')) - self.assertTrue(cad.authorize('A', 'X:Y:L0_V2', 'M:N:L0_V1')) - self.assertFalse(cad.authorize('A', 'X:Y:L0_V2', 'M:N:L1_V1')) + self.assertTrue(cad.authorize('A', 'x:y:L0_V2', 'M:N:L0_V1')) + self.assertFalse(cad.authorize('A', 'x:y:L0_V2', 'M:N:L1_V1')) return From 94086944aa60863a84a89405e8562d78d3cc061b Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 25 Nov 2025 08:08:42 -0800 Subject: [PATCH 14/35] feat: update authorize methods based on main business logic --- .../daac_archiver/catalia_auth_db.py | 106 +++++++++++++++--- .../catalia_daac_handshakes_db.py | 40 +++++++ .../daac_archiver/daac_archiver_catalia.py | 1 + .../daac_archiver/daac_archiver_logic.py | 14 +-- 4 files changed, 139 insertions(+), 22 deletions(-) create mode 100644 cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py diff --git a/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py index 826d7c8c..f4020480 100644 --- a/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py +++ b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py @@ -1,15 +1,25 @@ +import logging import re +from collections import defaultdict + from mdps_ds_lib.lib.aws.no_sql_abstract import NoSqlProps from mdps_ds_lib.lib.aws.no_sql_ddb import NoSqlDdb from mdps_ds_lib.lib.aws.no_sql_factory import NoSqlFactory +logger = logging.getLogger(__name__) class CataliaAuthDb: + user_group = 'userGroup' + access = 'access' + project_map = 'projectMap' + source_project = 'sourceProject' + target_project = 'targetProject' + def __init__(self, table_name: str): ddb_props = NoSqlProps() ddb_props.table = table_name - ddb_props.primary_key = 'userGroup' - ddb_props.secondary_key = 'projectMap' + ddb_props.primary_key = self.user_group + ddb_props.secondary_key = self.project_map param = ddb_props.to_json() param['file_repo'] = 'AWS_DDB' @@ -18,17 +28,78 @@ def __init__(self, table_name: str): def add(self, user_group, collection, daac_collection, access: bool): item1 = { - 'userGroup': user_group, - 'projectMap': '', - 'sourceProject': collection, - 'targetProject': daac_collection, - 'access': access, + # 'userGroup': user_group, + # 'projectMap': '', + self.source_project: collection, + self.target_project: daac_collection, + self.access: access, } - sk1 = f'{item1["sourceProject"]}->{item1["targetProject"]}' - self.__ddb.add(item1['userGroup'], sk1, item1, replace=True) + sk1 = f'{collection}->{daac_collection}' + self.__ddb.add(user_group, sk1, item1, replace=True) return - def authorize(self, user_group, catalia_collection, daac_collection): + def get_authorized_catalia(self, user_group: list[str], catalia_collection): + results = [] + for group in user_group: + group_results = self.__ddb.get(group, secondary_key=None) + if group_results: + results.extend(group_results) + + if not results or len(results) == 0: + return [] + + source_matches = [] + for result in results: + source_pattern = result.get('sourceProject', '') + try: + if re.match(source_pattern, catalia_collection, re.IGNORECASE): + source_matches.append(result) + except re.error: + if source_pattern == catalia_collection: + source_matches.append(result) + return source_matches + + def get_authorized_daac(self, source_matches: list, catalia_collection, daac_collections: list[str]): + target_matches = defaultdict(list) + for result in source_matches: + target_pattern = result.get('targetProject', '') + for daac_collection in daac_collections: + try: + if re.match(target_pattern, daac_collection, re.IGNORECASE): + target_matches[daac_collection].append(result) + except re.error: + if target_pattern == daac_collection: + target_matches[daac_collection].append(result) + authorized_daac_collections = [] + for k, v in target_matches.items(): + if len(v) < 1: + continue + if len(v) == 1 and v[0].get(self.access, False): + authorized_daac_collections.append(k) + continue + closest_target = min(v, key=lambda x: self._string_distance(x.get(self.target_project, ''), k)) + final_matches = [r for r in v if r.get(self.target_project) == closest_target.get(self.target_project)] + if len(final_matches) < 1: + continue + if len(final_matches) == 0 and final_matches[0].get(self.access, False): + authorized_daac_collections.append(k) + continue + closest_target_1 = min(final_matches, key=lambda x: self._string_distance(x.get(self.source_project, ''), catalia_collection)) + final_matches_1 = [r for r in final_matches if r.get(self.source_project) == closest_target_1.get(self.source_project)] + if len(final_matches_1) < 1: + continue + if len(final_matches_1) > 1: + logger.warning(f'duplicated Auth rows ? :{k} = {v}') + if final_matches_1[0].get(self.access, False): + authorized_daac_collections.append(k) + continue + return authorized_daac_collections + + def get_authorized_daac_full(self, user_group: list[str], catalia_collection, daac_collections: list[str]): + source_matches = self.get_authorized_catalia(user_group, catalia_collection) + return self.get_authorized_daac(source_matches, catalia_collection, daac_collections) + + def authorize(self, user_group: list[str], catalia_collection, daac_collection): """ This will retrieve entries from DDB with user_group as PK. (This is done). If results is None or empty array, return False. Not Authorized. @@ -46,7 +117,11 @@ def authorize(self, user_group, catalia_collection, daac_collection): :param daac_collection: :return: """ - results = self.__ddb.get(user_group, secondary_key=None) + results = [] + for group in user_group: + group_results = self.__ddb.get(group, secondary_key=None) + if group_results: + results.extend(group_results) if not results or len(results) == 0: return False @@ -80,16 +155,17 @@ def authorize(self, user_group, catalia_collection, daac_collection): if len(target_matches) == 1: return target_matches[0].get('access', False) - closest_target = min(target_matches, key=lambda x: self._string_distance(x.get('targetProject', ''), daac_collection)) + closest_target = min(target_matches, key=lambda x: self._string_distance(x.get(self.target_project, ''), daac_collection)) - final_matches = [r for r in target_matches if r.get('targetProject') == closest_target.get('targetProject')] + final_matches = [r for r in target_matches if r.get(self.target_project) == closest_target.get(self.target_project)] if len(final_matches) == 1: return final_matches[0].get('access', False) - closest_source = min(final_matches, key=lambda x: self._string_distance(x.get('sourceProject', ''), catalia_collection)) + closest_source = min(final_matches, key=lambda x: self._string_distance(x.get(self.source_project, ''), catalia_collection)) + final_matches = [r for r in target_matches if r.get(self.source_project) == closest_source.get(self.source_project)] - return closest_source.get('access', False) + return final_matches[0].get('access', False) def _string_distance(self, s1, s2): """ diff --git a/cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py b/cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py new file mode 100644 index 00000000..81c8c29a --- /dev/null +++ b/cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py @@ -0,0 +1,40 @@ +from mdps_ds_lib.lib.aws.no_sql_abstract import NoSqlProps +from mdps_ds_lib.lib.aws.no_sql_ddb import NoSqlDdb +from mdps_ds_lib.lib.aws.no_sql_factory import NoSqlFactory + + +class CataliaDaacHandshakesDb: + user_group = 'userGroup' + user = 'user' + source_project = 'sourceProject' + target_project = 'targetProject' + + def __init__(self, table_name: str): + ddb_props = NoSqlProps() + ddb_props.table = table_name + ddb_props.primary_key = self.source_project + ddb_props.secondary_key = self.target_project + + param = ddb_props.to_json() + param['file_repo'] = 'AWS_DDB' + + self.__ddb: NoSqlDdb = NoSqlFactory().get_instance(**param) + + def add(self, catalia_collection, daac_collection, api_key, provider, data_version, sns_topic_arn, role_arn, role_session_name, archiving_types, user, user_group): + item1 = { + self.user_group: user_group, + self.user: user, + 'provider': provider, + 'data_version': data_version, + 'sns_topic_arn': sns_topic_arn, + 'role_arn': role_arn, + 'role_session_name': role_session_name, + 'archiving_types': archiving_types, + 'api_key': api_key, + } + self.__ddb.add(catalia_collection, daac_collection, item1, replace=True) + return + + def search(self, catalia_collection): + results = self.__ddb.get(catalia_collection, secondary_key=None) + return results diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index c42aed10..b7a1de89 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -467,6 +467,7 @@ def send_daac_sns(self, daac_config): 'version': daac_config['daac_data_version'], }, "identifier": self.__archiving_granules_stac.id, # Seems like it's the same granule IDuds_cnm_json['identifier'], + # From DAAC: Unique identifier for the message as a whole. It is the senders responsibility to ensure uniqueness. This identifier can be used in response messages to provide tracability. "submissionTime": f'{TimeUtils.get_current_time()}Z', "provider": daac_config['daac_provider'], # NOTE: we can't use tenant as provider anymore coz we aren't sure tennt will be there in CATALIA. if 'daac_provider' in daac_config else granule_identifier.tenant "version": self.__cnm_msg_version, diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py index ab1a5c2f..01cf55e7 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py @@ -158,23 +158,23 @@ def update_stac(self, cnm_notification_msg): def update_stac_fast_api(self, cnm_notification_msg): sfa_client = SFAClientFactory().get_instance_from_env() + # TODO: update this part ? how to get collection and granule id? collection_id, granule_id = ':'.join(cnm_notification_msg['identifier'].split(':')[:-1]), cnm_notification_msg['identifier'] # TODO assuming granule ID is URN:NASA:VENUE:TENANT:VENUE:COLLECTION_ID:COLLECTION_ID existing_item = sfa_client.get_item(collection_id, granule_id) # TODO handle error when no existing_item. Currently, it is requests.HTTPError with 404 if cnm_notification_msg['response']['status'] == 'SUCCESS': latest_daac_status = { - 'archive_status': 'cnm_r_success', - 'archive_error_message': '', - 'archive_error_code': '', + 'status': 'cnm-receive-success', } + # TODO ask DAAC if they pass HREF? else: latest_daac_status = { - 'archive_status': 'cnm_r_failed', - 'archive_error_message': cnm_notification_msg['response']['errorMessage'] if 'errorMessage' in cnm_notification_msg['response'] else 'unknown', - 'archive_error_code': cnm_notification_msg['response']['errorCode'] if 'errorCode' in cnm_notification_msg['response'] else 'unknown', + 'status': 'cnm-receive-failed', + 'errorMessage': cnm_notification_msg['response']['errorMessage'] if 'errorMessage' in cnm_notification_msg['response'] else 'unknown', + 'errorCode': cnm_notification_msg['response']['errorCode'] if 'errorCode' in cnm_notification_msg['response'] else 'unknown', } - latest_daac_status['event_time'] = TimeUtils.get_current_time() + latest_daac_status['datetime'] = TimeUtils.get_current_time() existing_item['properties']['archival_statuses'] = existing_item['properties']['archival_statuses'] + [latest_daac_status] if 'archival_statuses' in existing_item['properties'] else [latest_daac_status] updated_item = sfa_client.update_item(collection_id, granule_id, existing_item, update_whole=True) # TODO partial update via patch is not working at this moment. return From 9a3e7381e282e1aea078c007c7615a52261586d9 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 25 Nov 2025 15:06:58 -0800 Subject: [PATCH 15/35] feat: re-creating vpc --- tf-module/old_vpc/README.md | 94 ++ tf-module/old_vpc/main.tf | 290 +++++ tf-module/old_vpc/outputs.tf | 84 ++ tf-module/old_vpc/terraform.tfstate | 1163 ++++++++++++++++++++ tf-module/old_vpc/terraform.tfvars.example | 46 + tf-module/old_vpc/variables.tf | 72 ++ 6 files changed, 1749 insertions(+) create mode 100644 tf-module/old_vpc/README.md create mode 100644 tf-module/old_vpc/main.tf create mode 100644 tf-module/old_vpc/outputs.tf create mode 100644 tf-module/old_vpc/terraform.tfstate create mode 100644 tf-module/old_vpc/terraform.tfvars.example create mode 100644 tf-module/old_vpc/variables.tf diff --git a/tf-module/old_vpc/README.md b/tf-module/old_vpc/README.md new file mode 100644 index 00000000..80f94577 --- /dev/null +++ b/tf-module/old_vpc/README.md @@ -0,0 +1,94 @@ +# VPC Terraform Module + +This Terraform configuration recreates a VPC based on the exported configuration from an existing AWS account. + +## Architecture + +The VPC includes: + +- **VPC**: Primary CIDR block (10.52.8.0/22) and secondary CIDR block (10.0.0.0/16) +- **Internet Gateway**: For public internet access +- **NAT Gateway**: Single NAT Gateway for private subnet internet access +- **Subnets**: + - 1 NAT Gateway subnet (10.52.11.0/28) + - 4 Public subnets across 4 availability zones + - 4 Private subnets across 4 availability zones +- **Route Tables**: Separate route tables for public and private subnets +- **VPC Endpoints**: + - S3 Gateway endpoint (always created) + - Execute API interface endpoint (optional) + - EFS interface endpoint (optional) + +## Prerequisites + +- Terraform >= 1.0 +- AWS CLI configured with appropriate credentials +- Appropriate AWS permissions to create VPC resources + +## Usage + +1. Copy the example variables file: + ```bash + cp terraform.tfvars.example terraform.tfvars + ``` + +2. Edit `terraform.tfvars` with your desired configuration + +3. Initialize Terraform: + ```bash + terraform init + ``` + +4. Review the plan: + ```bash + terraform plan + ``` + +5. Apply the configuration: + ```bash + terraform apply + ``` + +## Variables + +| Name | Description | Type | Default | +|------|-------------|------|---------| +| aws_region | AWS region | string | us-west-2 | +| name_prefix | Prefix for resource names | string | Unity-VPC | +| vpc_cidr_block | Primary VPC CIDR | string | 10.52.8.0/22 | +| secondary_cidr_block | Secondary VPC CIDR | string | 10.0.0.0/16 | +| availability_zones | List of AZs | list(string) | ["us-west-2a", "us-west-2b", "us-west-2c", "us-west-2d"] | +| public_subnet_cidrs | Public subnet CIDRs | list(string) | See variables.tf | +| private_subnet_cidrs | Private subnet CIDRs | list(string) | See variables.tf | +| enable_interface_endpoints | Enable VPC interface endpoints | bool | false | +| common_tags | Tags for all resources | map(string) | {Project = "Unity"} | + +## Outputs + +The module exports various outputs including VPC ID, subnet IDs, route table IDs, and VPC endpoint IDs. See `outputs.tf` for the complete list. + +## Customization + +To use different CIDR ranges or availability zones for a different AWS account: + +1. Modify the `vpc_cidr_block` and `secondary_cidr_block` if needed +2. Update `availability_zones` to match your target region +3. Adjust subnet CIDRs in `public_subnet_cidrs` and `private_subnet_cidrs` +4. Update `aws_region` to your target region +5. Add or modify tags in `common_tags` + +## Cost Considerations + +- **NAT Gateway**: Charges per hour and per GB of data processed +- **VPC Endpoints**: Interface endpoints charge per hour and per GB processed +- **Elastic IP**: No charge when associated with a running resource + +To reduce costs: +- Set `enable_interface_endpoints = false` if not needed +- Consider using a single availability zone for testing + +## Notes + +- Security groups are not included in this base configuration and should be created separately +- VPC peering connections from the original configuration are not included +- Some managed VPC endpoints (e.g., RabbitMQ, OpenSearch) are not included as they are service-specific diff --git a/tf-module/old_vpc/main.tf b/tf-module/old_vpc/main.tf new file mode 100644 index 00000000..ba215f6f --- /dev/null +++ b/tf-module/old_vpc/main.tf @@ -0,0 +1,290 @@ +terraform { + required_version = ">= 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region +} + +# VPC +resource "aws_vpc" "main" { + cidr_block = var.vpc_cidr_block + enable_dns_hostnames = true + enable_dns_support = true + instance_tenancy = "default" + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-VPC" + } + ) +} + +# Secondary CIDR Block +resource "aws_vpc_ipv4_cidr_block_association" "secondary" { + vpc_id = aws_vpc.main.id + cidr_block = var.secondary_cidr_block +} + +# Internet Gateway +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-IGW" + } + ) +} + +# Elastic IP for NAT Gateway +resource "aws_eip" "nat" { + domain = "vpc" + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-NAT-EIP" + } + ) + + depends_on = [aws_internet_gateway.main] +} + +# NAT Gateway Subnet (small subnet for NAT Gateway) +resource "aws_subnet" "nat_gateway" { + vpc_id = aws_vpc.main.id + cidr_block = var.nat_gateway_subnet_cidr + availability_zone = var.availability_zones[0] + map_public_ip_on_launch = true + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-NAT-Gateway-Subnet" + } + ) +} + +# NAT Gateway +resource "aws_nat_gateway" "main" { + allocation_id = aws_eip.nat.id + subnet_id = aws_subnet.nat_gateway.id + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-NAT" + } + ) + + depends_on = [aws_internet_gateway.main] +} + +# Public Subnets +resource "aws_subnet" "public" { + count = length(var.public_subnet_cidrs) + + vpc_id = aws_vpc.main.id + cidr_block = var.public_subnet_cidrs[count.index] + availability_zone = var.availability_zones[count.index] + map_public_ip_on_launch = true + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-Pub-Subnet-${count.index + 1}" + } + ) +} + +# Private Subnets +resource "aws_subnet" "private" { + count = length(var.private_subnet_cidrs) + + vpc_id = aws_vpc.main.id + cidr_block = var.private_subnet_cidrs[count.index] + availability_zone = var.availability_zones[count.index] + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-Priv-Subnet-${count.index + 1}" + } + ) +} + +# Route Table for NAT Gateway Subnet +resource "aws_route_table" "nat_gateway" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-NAT-Gateway-RT" + Network = "Public" + } + ) +} + +resource "aws_route_table_association" "nat_gateway" { + subnet_id = aws_subnet.nat_gateway.id + route_table_id = aws_route_table.nat_gateway.id +} + +# Public Route Tables (one per AZ) +resource "aws_route_table" "public" { + count = length(var.public_subnet_cidrs) + + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-Public-RT-AZ${count.index + 1}" + Network = "Public AZ${count.index + 1}" + } + ) +} + +resource "aws_route_table_association" "public" { + count = length(var.public_subnet_cidrs) + + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public[count.index].id +} + +# Private Route Tables (one per AZ) +resource "aws_route_table" "private" { + count = length(var.private_subnet_cidrs) + + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.main.id + } + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-Private-RT-AZ${count.index + 1}" + Network = "Private AZ${count.index + 1}" + } + ) +} + +resource "aws_route_table_association" "private" { + count = length(var.private_subnet_cidrs) + + subnet_id = aws_subnet.private[count.index].id + route_table_id = aws_route_table.private[count.index].id +} + +# S3 VPC Endpoint (Gateway) +resource "aws_vpc_endpoint" "s3" { + vpc_id = aws_vpc.main.id + service_name = "com.amazonaws.${var.aws_region}.s3" + + route_table_ids = concat( + aws_route_table.public[*].id, + aws_route_table.private[*].id + ) + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-s3-endpoint" + } + ) +} + +# VPC Endpoints for Interface services (optional, can be enabled via variable) +resource "aws_security_group" "vpc_endpoints" { + count = var.enable_interface_endpoints ? 1 : 0 + + name = "${var.name_prefix}-vpc-endpoints-sg" + description = "Security group for VPC endpoints" + vpc_id = aws_vpc.main.id + + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = [var.vpc_cidr_block, var.secondary_cidr_block] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-vpc-endpoints-sg" + } + ) +} + +# Execute API VPC Endpoint +resource "aws_vpc_endpoint" "execute_api" { + count = var.enable_interface_endpoints ? 1 : 0 + + vpc_id = aws_vpc.main.id + service_name = "com.amazonaws.${var.aws_region}.execute-api" + vpc_endpoint_type = "Interface" + private_dns_enabled = false + + subnet_ids = slice(aws_subnet.private[*].id, 0, min(2, length(aws_subnet.private))) + + security_group_ids = [aws_security_group.vpc_endpoints[0].id] + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-execute-api-endpoint" + } + ) +} + +# EFS VPC Endpoint +resource "aws_vpc_endpoint" "efs" { + count = var.enable_interface_endpoints ? 1 : 0 + + vpc_id = aws_vpc.main.id + service_name = "com.amazonaws.${var.aws_region}.elasticfilesystem-fips" + vpc_endpoint_type = "Interface" + private_dns_enabled = false + + subnet_ids = aws_subnet.private[*].id + + security_group_ids = [aws_security_group.vpc_endpoints[0].id] + + tags = merge( + var.common_tags, + { + Name = "${var.name_prefix}-efs-endpoint" + } + ) +} diff --git a/tf-module/old_vpc/outputs.tf b/tf-module/old_vpc/outputs.tf new file mode 100644 index 00000000..1afc6445 --- /dev/null +++ b/tf-module/old_vpc/outputs.tf @@ -0,0 +1,84 @@ +output "vpc_id" { + description = "ID of the VPC" + value = aws_vpc.main.id +} + +output "vpc_cidr_block" { + description = "Primary CIDR block of the VPC" + value = aws_vpc.main.cidr_block +} + +output "vpc_secondary_cidr_block" { + description = "Secondary CIDR block of the VPC" + value = aws_vpc_ipv4_cidr_block_association.secondary.cidr_block +} + +output "internet_gateway_id" { + description = "ID of the Internet Gateway" + value = aws_internet_gateway.main.id +} + +output "nat_gateway_id" { + description = "ID of the NAT Gateway" + value = aws_nat_gateway.main.id +} + +output "nat_gateway_public_ip" { + description = "Public IP address of the NAT Gateway" + value = aws_eip.nat.public_ip +} + +output "nat_gateway_subnet_id" { + description = "ID of the NAT Gateway subnet" + value = aws_subnet.nat_gateway.id +} + +output "public_subnet_ids" { + description = "List of IDs of public subnets" + value = aws_subnet.public[*].id +} + +output "public_subnet_cidrs" { + description = "List of CIDR blocks of public subnets" + value = aws_subnet.public[*].cidr_block +} + +output "private_subnet_ids" { + description = "List of IDs of private subnets" + value = aws_subnet.private[*].id +} + +output "private_subnet_cidrs" { + description = "List of CIDR blocks of private subnets" + value = aws_subnet.private[*].cidr_block +} + +output "public_route_table_ids" { + description = "List of IDs of public route tables" + value = aws_route_table.public[*].id +} + +output "private_route_table_ids" { + description = "List of IDs of private route tables" + value = aws_route_table.private[*].id +} + +output "s3_vpc_endpoint_id" { + description = "ID of the S3 VPC endpoint" + value = aws_vpc_endpoint.s3.id +} + +output "vpc_endpoints_security_group_id" { + description = "ID of the security group for VPC endpoints" + value = var.enable_interface_endpoints ? aws_security_group.vpc_endpoints[0].id : null +} + +output "execute_api_vpc_endpoint_id" { + description = "ID of the Execute API VPC endpoint" + value = var.enable_interface_endpoints ? aws_vpc_endpoint.execute_api[0].id : null +} + +output "efs_vpc_endpoint_id" { + description = "ID of the EFS VPC endpoint" + value = var.enable_interface_endpoints ? aws_vpc_endpoint.efs[0].id : null +} diff --git a/tf-module/old_vpc/terraform.tfstate b/tf-module/old_vpc/terraform.tfstate new file mode 100644 index 00000000..0ba83288 --- /dev/null +++ b/tf-module/old_vpc/terraform.tfstate @@ -0,0 +1,1163 @@ +{ + "version": 4, + "terraform_version": "1.2.6", + "serial": 26, + "lineage": "f4d5029b-85b5-b410-34df-b5a1580553e7", + "outputs": { + "internet_gateway_id": { + "value": "igw-0d487785c1302d5a6", + "type": "string" + }, + "nat_gateway_id": { + "value": "nat-011f2d082b4a65666", + "type": "string" + }, + "nat_gateway_public_ip": { + "value": "35.165.209.70", + "type": "string" + }, + "nat_gateway_subnet_id": { + "value": "subnet-0ba09be444f1a3be4", + "type": "string" + }, + "private_route_table_ids": { + "value": [ + "rtb-00e8ba8f033871d37", + "rtb-040833f470d742920", + "rtb-07f21adf632394e67", + "rtb-0c12576c2588c9c27" + ], + "type": [ + "tuple", + [ + "string", + "string", + "string", + "string" + ] + ] + }, + "private_subnet_cidrs": { + "value": [ + "10.52.10.0/25", + "10.52.10.128/25", + "10.0.0.0/19", + "10.0.32.0/19" + ], + "type": [ + "tuple", + [ + "string", + "string", + "string", + "string" + ] + ] + }, + "public_route_table_ids": { + "value": [ + "rtb-04a445b289c2e2e56", + "rtb-0b663425eeb701116", + "rtb-079bca8d907ff6723", + "rtb-077cc6b30507aaa90" + ], + "type": [ + "tuple", + [ + "string", + "string", + "string", + "string" + ] + ] + }, + "public_subnet_cidrs": { + "value": [ + "10.52.8.0/24", + "10.52.9.0/24", + "10.0.64.0/19", + "10.0.96.0/19" + ], + "type": [ + "tuple", + [ + "string", + "string", + "string", + "string" + ] + ] + }, + "s3_vpc_endpoint_id": { + "value": "vpce-0052fce3ffb2e63f9", + "type": "string" + }, + "vpc_cidr_block": { + "value": "10.52.8.0/22", + "type": "string" + }, + "vpc_id": { + "value": "vpc-0c241db231a406be9", + "type": "string" + }, + "vpc_secondary_cidr_block": { + "value": "10.0.0.0/16", + "type": "string" + } + }, + "resources": [ + { + "mode": "managed", + "type": "aws_eip", + "name": "nat", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "address": null, + "allocation_id": "eipalloc-0b4a6e4c21a868fb4", + "arn": "arn:aws:ec2:us-west-2:979188186972:elastic-ip/eipalloc-0b4a6e4c21a868fb4", + "associate_with_private_ip": null, + "association_id": "", + "carrier_ip": "", + "customer_owned_ip": "", + "customer_owned_ipv4_pool": "", + "domain": "vpc", + "id": "eipalloc-0b4a6e4c21a868fb4", + "instance": "", + "ipam_pool_id": null, + "network_border_group": "us-west-2", + "network_interface": "", + "private_dns": null, + "private_ip": "", + "ptr_record": "", + "public_dns": "ec2-35-165-209-70.us-west-2.compute.amazonaws.com", + "public_ip": "35.165.209.70", + "public_ipv4_pool": "amazon", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT-EIP", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT-EIP", + "Project": "Unity" + }, + "timeouts": null, + "vpc": true + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiZGVsZXRlIjoxODAwMDAwMDAwMDAsInJlYWQiOjkwMDAwMDAwMDAwMCwidXBkYXRlIjozMDAwMDAwMDAwMDB9fQ==", + "dependencies": [ + "aws_internet_gateway.main", + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_internet_gateway", + "name": "main", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:internet-gateway/igw-0d487785c1302d5a6", + "id": "igw-0d487785c1302d5a6", + "owner_id": "979188186972", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-IGW", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-IGW", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInVwZGF0ZSI6MTIwMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_nat_gateway", + "name": "main", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "allocation_id": "eipalloc-0b4a6e4c21a868fb4", + "association_id": "eipassoc-0238dd27fabdd8031", + "connectivity_type": "public", + "id": "nat-011f2d082b4a65666", + "network_interface_id": "eni-076b2efeb61790323", + "private_ip": "10.52.11.12", + "public_ip": "35.165.209.70", + "secondary_allocation_ids": null, + "secondary_private_ip_address_count": 0, + "secondary_private_ip_addresses": [], + "subnet_id": "subnet-0ba09be444f1a3be4", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT", + "Project": "Unity" + }, + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTgwMDAwMDAwMDAwMCwidXBkYXRlIjo2MDAwMDAwMDAwMDB9fQ==", + "dependencies": [ + "aws_eip.nat", + "aws_internet_gateway.main", + "aws_subnet.nat_gateway", + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_route_table", + "name": "nat_gateway", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-07dcad2dfd64fd463", + "id": "rtb-07dcad2dfd64fd463", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "igw-0d487785c1302d5a6", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT-Gateway-RT", + "Network": "Public", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT-Gateway-RT", + "Network": "Public", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_internet_gateway.main", + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_route_table", + "name": "private", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "index_key": 0, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-00e8ba8f033871d37", + "id": "rtb-00e8ba8f033871d37", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "nat-011f2d082b4a65666", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ1", + "Network": "Private AZ1", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ1", + "Network": "Private AZ1", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_eip.nat", + "aws_internet_gateway.main", + "aws_nat_gateway.main", + "aws_subnet.nat_gateway", + "aws_vpc.main" + ] + }, + { + "index_key": 1, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-040833f470d742920", + "id": "rtb-040833f470d742920", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "nat-011f2d082b4a65666", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ2", + "Network": "Private AZ2", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ2", + "Network": "Private AZ2", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_eip.nat", + "aws_internet_gateway.main", + "aws_nat_gateway.main", + "aws_subnet.nat_gateway", + "aws_vpc.main" + ] + }, + { + "index_key": 2, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-07f21adf632394e67", + "id": "rtb-07f21adf632394e67", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "nat-011f2d082b4a65666", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ3", + "Network": "Private AZ3", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ3", + "Network": "Private AZ3", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_eip.nat", + "aws_internet_gateway.main", + "aws_nat_gateway.main", + "aws_subnet.nat_gateway", + "aws_vpc.main" + ] + }, + { + "index_key": 3, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-0c12576c2588c9c27", + "id": "rtb-0c12576c2588c9c27", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "nat-011f2d082b4a65666", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ4", + "Network": "Private AZ4", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Private-RT-AZ4", + "Network": "Private AZ4", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_eip.nat", + "aws_internet_gateway.main", + "aws_nat_gateway.main", + "aws_subnet.nat_gateway", + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_route_table", + "name": "public", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "index_key": 0, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-04a445b289c2e2e56", + "id": "rtb-04a445b289c2e2e56", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "igw-0d487785c1302d5a6", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ1", + "Network": "Public AZ1", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ1", + "Network": "Public AZ1", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_internet_gateway.main", + "aws_vpc.main" + ] + }, + { + "index_key": 1, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-0b663425eeb701116", + "id": "rtb-0b663425eeb701116", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "igw-0d487785c1302d5a6", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ2", + "Network": "Public AZ2", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ2", + "Network": "Public AZ2", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_internet_gateway.main", + "aws_vpc.main" + ] + }, + { + "index_key": 2, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-079bca8d907ff6723", + "id": "rtb-079bca8d907ff6723", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "igw-0d487785c1302d5a6", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ3", + "Network": "Public AZ3", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ3", + "Network": "Public AZ3", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_internet_gateway.main", + "aws_vpc.main" + ] + }, + { + "index_key": 3, + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:route-table/rtb-077cc6b30507aaa90", + "id": "rtb-077cc6b30507aaa90", + "owner_id": "979188186972", + "propagating_vgws": [], + "route": [ + { + "carrier_gateway_id": "", + "cidr_block": "0.0.0.0/0", + "core_network_arn": "", + "destination_prefix_list_id": "", + "egress_only_gateway_id": "", + "gateway_id": "igw-0d487785c1302d5a6", + "ipv6_cidr_block": "", + "local_gateway_id": "", + "nat_gateway_id": "", + "network_interface_id": "", + "transit_gateway_id": "", + "vpc_endpoint_id": "", + "vpc_peering_connection_id": "" + } + ], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ4", + "Network": "Public AZ4", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Public-RT-AZ4", + "Network": "Public AZ4", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_internet_gateway.main", + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_route_table_association", + "name": "nat_gateway", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-0cd827f97147f894f", + "route_table_id": "rtb-07dcad2dfd64fd463", + "subnet_id": "subnet-0ba09be444f1a3be4", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_internet_gateway.main", + "aws_route_table.nat_gateway", + "aws_subnet.nat_gateway", + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_route_table_association", + "name": "private", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [] + }, + { + "mode": "managed", + "type": "aws_route_table_association", + "name": "public", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [] + }, + { + "mode": "managed", + "type": "aws_subnet", + "name": "nat_gateway", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-0ba09be444f1a3be4", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2a", + "availability_zone_id": "usw2-az1", + "cidr_block": "10.52.11.0/28", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-0ba09be444f1a3be4", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": true, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT-Gateway-Subnet", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-NAT-Gateway-Subnet", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_subnet", + "name": "private", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "index_key": 0, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-0b6bb87de750f3241", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2a", + "availability_zone_id": "usw2-az1", + "cidr_block": "10.52.10.0/25", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-0b6bb87de750f3241", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": false, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-1", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-1", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main" + ] + }, + { + "index_key": 1, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-0776bff97794b5782", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2b", + "availability_zone_id": "usw2-az2", + "cidr_block": "10.52.10.128/25", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-0776bff97794b5782", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": false, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-2", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-2", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_subnet", + "name": "public", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "index_key": 0, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-006ed82a83395edb2", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2a", + "availability_zone_id": "usw2-az1", + "cidr_block": "10.52.8.0/24", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-006ed82a83395edb2", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": true, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-1", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-1", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main" + ] + }, + { + "index_key": 1, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-0ccc52d248d1ccecf", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2b", + "availability_zone_id": "usw2-az2", + "cidr_block": "10.52.9.0/24", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-0ccc52d248d1ccecf", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": true, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-2", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-2", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main" + ] + }, + { + "index_key": 3, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-0bcc85818ce5a7659", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2d", + "availability_zone_id": "usw2-az4", + "cidr_block": "10.0.96.0/19", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-0bcc85818ce5a7659", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": true, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-4", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-4", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_vpc", + "name": "main", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:vpc/vpc-0c241db231a406be9", + "assign_generated_ipv6_cidr_block": false, + "cidr_block": "10.52.8.0/22", + "default_network_acl_id": "acl-038915b54db57bb6a", + "default_route_table_id": "rtb-0eaffdb90c177fd3d", + "default_security_group_id": "sg-0a6db41101bbcc8aa", + "dhcp_options_id": "dopt-004c5fb057641a75a", + "enable_dns_hostnames": true, + "enable_dns_support": true, + "enable_network_address_usage_metrics": false, + "id": "vpc-0c241db231a406be9", + "instance_tenancy": "default", + "ipv4_ipam_pool_id": null, + "ipv4_netmask_length": null, + "ipv6_association_id": "", + "ipv6_cidr_block": "", + "ipv6_cidr_block_network_border_group": "", + "ipv6_ipam_pool_id": "", + "ipv6_netmask_length": 0, + "main_route_table_id": "rtb-0eaffdb90c177fd3d", + "owner_id": "979188186972", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-VPC", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-VPC", + "Project": "Unity" + } + }, + "sensitive_attributes": [], + "private": "eyJzY2hlbWFfdmVyc2lvbiI6IjEifQ==" + } + ] + }, + { + "mode": "managed", + "type": "aws_vpc_endpoint", + "name": "s3", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:vpc-endpoint/vpce-0052fce3ffb2e63f9", + "auto_accept": null, + "cidr_blocks": [ + "3.5.76.0/22", + "16.12.96.0/20", + "16.12.112.0/21", + "3.5.80.0/21", + "18.34.48.0/20", + "18.34.244.0/22", + "16.12.88.0/21", + "52.92.128.0/17", + "52.218.128.0/17" + ], + "dns_entry": [], + "dns_options": [ + { + "dns_record_ip_type": "service-defined", + "private_dns_only_for_inbound_resolver_endpoint": false + } + ], + "id": "vpce-0052fce3ffb2e63f9", + "ip_address_type": "ipv4", + "network_interface_ids": [], + "owner_id": "979188186972", + "policy": "{\"Statement\":[{\"Action\":\"*\",\"Effect\":\"Allow\",\"Principal\":\"*\",\"Resource\":\"*\"}],\"Version\":\"2008-10-17\"}", + "prefix_list_id": "pl-68a54001", + "private_dns_enabled": false, + "requester_managed": false, + "resource_configuration_arn": "", + "route_table_ids": [ + "rtb-00e8ba8f033871d37", + "rtb-040833f470d742920", + "rtb-04a445b289c2e2e56", + "rtb-077cc6b30507aaa90", + "rtb-079bca8d907ff6723", + "rtb-07f21adf632394e67", + "rtb-0b663425eeb701116", + "rtb-0c12576c2588c9c27" + ], + "security_group_ids": [], + "service_name": "com.amazonaws.us-west-2.s3", + "service_network_arn": "", + "service_region": "us-west-2", + "state": "available", + "subnet_configuration": [], + "subnet_ids": [], + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-s3-endpoint", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-s3-endpoint", + "Project": "Unity" + }, + "timeouts": null, + "vpc_endpoint_type": "Gateway", + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6NjAwMDAwMDAwMDAwLCJ1cGRhdGUiOjYwMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_eip.nat", + "aws_internet_gateway.main", + "aws_nat_gateway.main", + "aws_route_table.private", + "aws_route_table.public", + "aws_subnet.nat_gateway", + "aws_vpc.main" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_vpc_ipv4_cidr_block_association", + "name": "secondary", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "cidr_block": "10.0.0.0/16", + "id": "vpc-cidr-assoc-0f11f66d89be8be8a", + "ipv4_ipam_pool_id": null, + "ipv4_netmask_length": null, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6NjAwMDAwMDAwMDAwfX0=", + "dependencies": [ + "aws_vpc.main" + ] + } + ] + } + ] +} diff --git a/tf-module/old_vpc/terraform.tfvars.example b/tf-module/old_vpc/terraform.tfvars.example new file mode 100644 index 00000000..0348bfff --- /dev/null +++ b/tf-module/old_vpc/terraform.tfvars.example @@ -0,0 +1,46 @@ +# AWS Region +aws_region = "us-west-2" + +# Resource naming prefix +name_prefix = "Unity-VPC" + +# VPC CIDR blocks +vpc_cidr_block = "10.52.8.0/22" +secondary_cidr_block = "10.0.0.0/16" + +# Availability zones (adjust based on your region) +availability_zones = [ + "us-west-2a", + "us-west-2b", + "us-west-2c", + "us-west-2d" +] + +# NAT Gateway subnet +nat_gateway_subnet_cidr = "10.52.11.0/28" + +# Public subnets (one per AZ) +public_subnet_cidrs = [ + "10.52.8.0/24", # AZ1 + "10.52.9.0/24", # AZ2 + "10.0.64.0/19", # AZ3 + "10.0.96.0/19" # AZ4 +] + +# Private subnets (one per AZ) +private_subnet_cidrs = [ + "10.52.10.0/25", # AZ1 + "10.52.10.128/25", # AZ2 + "10.0.0.0/19", # AZ3 + "10.0.32.0/19" # AZ4 +] + +# Enable VPC interface endpoints (optional, adds cost) +enable_interface_endpoints = false + +# Common tags applied to all resources +common_tags = { + Project = "Unity" + Environment = "Dev" + ManagedBy = "Terraform" +} diff --git a/tf-module/old_vpc/variables.tf b/tf-module/old_vpc/variables.tf new file mode 100644 index 00000000..25021d45 --- /dev/null +++ b/tf-module/old_vpc/variables.tf @@ -0,0 +1,72 @@ +variable "aws_region" { + description = "AWS region where the VPC will be created" + type = string + default = "us-west-2" +} + +variable "name_prefix" { + description = "Prefix for naming resources" + type = string + default = "Unity-VPC" +} + +variable "vpc_cidr_block" { + description = "Primary CIDR block for the VPC" + type = string + default = "10.52.8.0/22" +} + +variable "secondary_cidr_block" { + description = "Secondary CIDR block for the VPC" + type = string + default = "10.0.0.0/16" +} + +variable "availability_zones" { + description = "List of availability zones to use for subnets" + type = list(string) + default = ["us-west-2a", "us-west-2b", "us-west-2c", "us-west-2d"] +} + +variable "nat_gateway_subnet_cidr" { + description = "CIDR block for NAT Gateway subnet" + type = string + default = "10.52.11.0/28" +} + +variable "public_subnet_cidrs" { + description = "List of CIDR blocks for public subnets" + type = list(string) + default = [ + "10.52.8.0/24", # Public Subnet 01 - AZ1 + "10.52.9.0/24", # Public Subnet 02 - AZ2 + "10.0.64.0/19", # Public Subnet 03 - AZ3 + "10.0.96.0/19" # Public Subnet 04 - AZ4 + ] +} + +variable "private_subnet_cidrs" { + description = "List of CIDR blocks for private subnets" + type = list(string) + default = [ + "10.52.10.0/25", # Private Subnet 01 - AZ1 + "10.52.10.128/25", # Private Subnet 02 - AZ2 + "10.0.0.0/19", # Private Subnet 03 - AZ3 + "10.0.32.0/19" # Private Subnet 04 - AZ4 + ] +} + +variable "enable_interface_endpoints" { + description = "Enable VPC interface endpoints for AWS services" + type = bool + default = false +} + +variable "common_tags" { + description = "Common tags to apply to all resources" + type = map(string) + default = { + Project = "Unity" + ManagedBy = "Terraform" + } +} From daac3b0b1e79ccc56d92de3be165608416715db6 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 25 Nov 2025 15:12:03 -0800 Subject: [PATCH 16/35] fix: add dependency for cidr blk in subnet --- tf-module/old_vpc/main.tf | 4 + tf-module/old_vpc/terraform.tfstate | 348 ++++++++++++++++++++++++++-- 2 files changed, 339 insertions(+), 13 deletions(-) diff --git a/tf-module/old_vpc/main.tf b/tf-module/old_vpc/main.tf index ba215f6f..1b9a8afa 100644 --- a/tf-module/old_vpc/main.tf +++ b/tf-module/old_vpc/main.tf @@ -104,6 +104,8 @@ resource "aws_subnet" "public" { Name = "${var.name_prefix}-Pub-Subnet-${count.index + 1}" } ) + + depends_on = [aws_vpc_ipv4_cidr_block_association.secondary] } # Private Subnets @@ -120,6 +122,8 @@ resource "aws_subnet" "private" { Name = "${var.name_prefix}-Priv-Subnet-${count.index + 1}" } ) + + depends_on = [aws_vpc_ipv4_cidr_block_association.secondary] } # Route Table for NAT Gateway Subnet diff --git a/tf-module/old_vpc/terraform.tfstate b/tf-module/old_vpc/terraform.tfstate index 0ba83288..940b3c21 100644 --- a/tf-module/old_vpc/terraform.tfstate +++ b/tf-module/old_vpc/terraform.tfstate @@ -1,7 +1,7 @@ { "version": 4, "terraform_version": "1.2.6", - "serial": 26, + "serial": 38, "lineage": "f4d5029b-85b5-b410-34df-b5a1580553e7", "outputs": { "internet_gateway_id": { @@ -54,6 +54,23 @@ ] ] }, + "private_subnet_ids": { + "value": [ + "subnet-0b6bb87de750f3241", + "subnet-0776bff97794b5782", + "subnet-0b71ecd458767f275", + "subnet-04528e22de2ee583d" + ], + "type": [ + "tuple", + [ + "string", + "string", + "string", + "string" + ] + ] + }, "public_route_table_ids": { "value": [ "rtb-04a445b289c2e2e56", @@ -88,6 +105,23 @@ ] ] }, + "public_subnet_ids": { + "value": [ + "subnet-006ed82a83395edb2", + "subnet-0ccc52d248d1ccecf", + "subnet-0606ca3056eec4ec8", + "subnet-0bcc85818ce5a7659" + ], + "type": [ + "tuple", + [ + "string", + "string", + "string", + "string" + ] + ] + }, "s3_vpc_endpoint_id": { "value": "vpce-0052fce3ffb2e63f9", "type": "string" @@ -119,7 +153,7 @@ "allocation_id": "eipalloc-0b4a6e4c21a868fb4", "arn": "arn:aws:ec2:us-west-2:979188186972:elastic-ip/eipalloc-0b4a6e4c21a868fb4", "associate_with_private_ip": null, - "association_id": "", + "association_id": "eipassoc-0238dd27fabdd8031", "carrier_ip": "", "customer_owned_ip": "", "customer_owned_ipv4_pool": "", @@ -128,9 +162,9 @@ "instance": "", "ipam_pool_id": null, "network_border_group": "us-west-2", - "network_interface": "", - "private_dns": null, - "private_ip": "", + "network_interface": "eni-076b2efeb61790323", + "private_dns": "ip-10-52-11-12.us-west-2.compute.internal", + "private_ip": "10.52.11.12", "ptr_record": "", "public_dns": "ec2-35-165-209-70.us-west-2.compute.amazonaws.com", "public_ip": "35.165.209.70", @@ -206,7 +240,7 @@ "network_interface_id": "eni-076b2efeb61790323", "private_ip": "10.52.11.12", "public_ip": "35.165.209.70", - "secondary_allocation_ids": null, + "secondary_allocation_ids": [], "secondary_private_ip_address_count": 0, "secondary_private_ip_addresses": [], "subnet_id": "subnet-0ba09be444f1a3be4", @@ -722,14 +756,168 @@ "type": "aws_route_table_association", "name": "private", "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", - "instances": [] + "instances": [ + { + "index_key": 0, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-0556374411fd3dcb7", + "route_table_id": "rtb-00e8ba8f033871d37", + "subnet_id": "subnet-0b6bb87de750f3241", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.private", + "aws_subnet.private", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 1, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-030e2b89b3feee64e", + "route_table_id": "rtb-040833f470d742920", + "subnet_id": "subnet-0776bff97794b5782", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.private", + "aws_subnet.private", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 2, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-055afff2cc653dcce", + "route_table_id": "rtb-07f21adf632394e67", + "subnet_id": "subnet-0b71ecd458767f275", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.private", + "aws_subnet.private", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 3, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-0579d7caf10b39f9c", + "route_table_id": "rtb-0c12576c2588c9c27", + "subnet_id": "subnet-04528e22de2ee583d", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.private", + "aws_subnet.private", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + } + ] }, { "mode": "managed", "type": "aws_route_table_association", "name": "public", "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", - "instances": [] + "instances": [ + { + "index_key": 0, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-01022900521940f4b", + "route_table_id": "rtb-04a445b289c2e2e56", + "subnet_id": "subnet-006ed82a83395edb2", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.public", + "aws_subnet.public", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 1, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-017202068ff0a0b89", + "route_table_id": "rtb-0b663425eeb701116", + "subnet_id": "subnet-0ccc52d248d1ccecf", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.public", + "aws_subnet.public", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 2, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-0fbb58262d715af47", + "route_table_id": "rtb-079bca8d907ff6723", + "subnet_id": "subnet-0606ca3056eec4ec8", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.public", + "aws_subnet.public", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 3, + "schema_version": 0, + "attributes": { + "gateway_id": "", + "id": "rtbassoc-03fb9eca505205bb4", + "route_table_id": "rtb-077cc6b30507aaa90", + "subnet_id": "subnet-0bcc85818ce5a7659", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDAsImRlbGV0ZSI6MzAwMDAwMDAwMDAwLCJ1cGRhdGUiOjEyMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_route_table.public", + "aws_subnet.public", + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + } + ] }, { "mode": "managed", @@ -825,7 +1013,8 @@ "sensitive_attributes": [], "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", "dependencies": [ - "aws_vpc.main" + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" ] }, { @@ -867,7 +1056,94 @@ "sensitive_attributes": [], "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", "dependencies": [ - "aws_vpc.main" + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 2, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-0b71ecd458767f275", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2c", + "availability_zone_id": "usw2-az3", + "cidr_block": "10.0.0.0/19", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-0b71ecd458767f275", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": false, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-3", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-3", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 3, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-04528e22de2ee583d", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2d", + "availability_zone_id": "usw2-az4", + "cidr_block": "10.0.32.0/19", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-04528e22de2ee583d", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": false, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-4", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Priv-Subnet-4", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" ] } ] @@ -917,7 +1193,8 @@ "sensitive_attributes": [], "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", "dependencies": [ - "aws_vpc.main" + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" ] }, { @@ -959,7 +1236,51 @@ "sensitive_attributes": [], "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", "dependencies": [ - "aws_vpc.main" + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" + ] + }, + { + "index_key": 2, + "schema_version": 1, + "attributes": { + "arn": "arn:aws:ec2:us-west-2:979188186972:subnet/subnet-0606ca3056eec4ec8", + "assign_ipv6_address_on_creation": false, + "availability_zone": "us-west-2c", + "availability_zone_id": "usw2-az3", + "cidr_block": "10.0.64.0/19", + "customer_owned_ipv4_pool": "", + "enable_dns64": false, + "enable_lni_at_device_index": 0, + "enable_resource_name_dns_a_record_on_launch": false, + "enable_resource_name_dns_aaaa_record_on_launch": false, + "id": "subnet-0606ca3056eec4ec8", + "ipv6_cidr_block": "", + "ipv6_cidr_block_association_id": "", + "ipv6_native": false, + "map_customer_owned_ip_on_launch": false, + "map_public_ip_on_launch": true, + "outpost_arn": "", + "owner_id": "979188186972", + "private_dns_hostname_type_on_launch": "ip-name", + "tags": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-3", + "Project": "Unity" + }, + "tags_all": { + "ManagedBy": "Terraform", + "Name": "Unity-VPC-Pub-Subnet-3", + "Project": "Unity" + }, + "timeouts": null, + "vpc_id": "vpc-0c241db231a406be9" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", + "dependencies": [ + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" ] }, { @@ -1001,7 +1322,8 @@ "sensitive_attributes": [], "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6MTIwMDAwMDAwMDAwMH0sInNjaGVtYV92ZXJzaW9uIjoiMSJ9", "dependencies": [ - "aws_vpc.main" + "aws_vpc.main", + "aws_vpc_ipv4_cidr_block_association.secondary" ] } ] From f3e1aa362da9d758afa3604ff8d62e9c9d5be87b Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 25 Nov 2025 15:22:11 -0800 Subject: [PATCH 17/35] chore: rename folder --- tf-module/{old_vpc => unity_vpc}/README.md | 0 tf-module/{old_vpc => unity_vpc}/main.tf | 0 tf-module/{old_vpc => unity_vpc}/outputs.tf | 0 tf-module/{old_vpc => unity_vpc}/terraform.tfstate | 0 tf-module/{old_vpc => unity_vpc}/terraform.tfvars.example | 0 tf-module/{old_vpc => unity_vpc}/variables.tf | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename tf-module/{old_vpc => unity_vpc}/README.md (100%) rename tf-module/{old_vpc => unity_vpc}/main.tf (100%) rename tf-module/{old_vpc => unity_vpc}/outputs.tf (100%) rename tf-module/{old_vpc => unity_vpc}/terraform.tfstate (100%) rename tf-module/{old_vpc => unity_vpc}/terraform.tfvars.example (100%) rename tf-module/{old_vpc => unity_vpc}/variables.tf (100%) diff --git a/tf-module/old_vpc/README.md b/tf-module/unity_vpc/README.md similarity index 100% rename from tf-module/old_vpc/README.md rename to tf-module/unity_vpc/README.md diff --git a/tf-module/old_vpc/main.tf b/tf-module/unity_vpc/main.tf similarity index 100% rename from tf-module/old_vpc/main.tf rename to tf-module/unity_vpc/main.tf diff --git a/tf-module/old_vpc/outputs.tf b/tf-module/unity_vpc/outputs.tf similarity index 100% rename from tf-module/old_vpc/outputs.tf rename to tf-module/unity_vpc/outputs.tf diff --git a/tf-module/old_vpc/terraform.tfstate b/tf-module/unity_vpc/terraform.tfstate similarity index 100% rename from tf-module/old_vpc/terraform.tfstate rename to tf-module/unity_vpc/terraform.tfstate diff --git a/tf-module/old_vpc/terraform.tfvars.example b/tf-module/unity_vpc/terraform.tfvars.example similarity index 100% rename from tf-module/old_vpc/terraform.tfvars.example rename to tf-module/unity_vpc/terraform.tfvars.example diff --git a/tf-module/old_vpc/variables.tf b/tf-module/unity_vpc/variables.tf similarity index 100% rename from tf-module/old_vpc/variables.tf rename to tf-module/unity_vpc/variables.tf From 305f3b602533bd73ef7eaf046a85e17ca439bfa8 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 4 Dec 2025 14:09:01 -0800 Subject: [PATCH 18/35] feat: adding terraform code for catalya --- tf-module/uds_catalia/catalia_ddb.tf | 63 +++++ tf-module/uds_catalia/daac_archiver.tf | 87 +++++++ .../uds_catalia/daac_archiver_sns_policy.json | 35 +++ tf-module/uds_catalia/main.tf | 33 +++ .../smce_keys_assume_deployment.sh | 140 ++++++++++ tf-module/uds_catalia/uds_api_lambda.tf | 33 +++ tf-module/uds_catalia/variables.tf | 243 ++++++++++++++++++ tf-module/uds_catalia/versions.tf | 3 + tf-module/uds_catalia_iam/main.tf | 52 ++++ .../smce_keys_assume_deployment.sh | 140 ++++++++++ .../uds_lambda_processing_role.tf | 186 ++++++++++++++ tf-module/uds_catalia_iam/variables.tf | 243 ++++++++++++++++++ tf-module/uds_catalia_iam/versions.tf | 3 + 13 files changed, 1261 insertions(+) create mode 100644 tf-module/uds_catalia/catalia_ddb.tf create mode 100644 tf-module/uds_catalia/daac_archiver.tf create mode 100644 tf-module/uds_catalia/daac_archiver_sns_policy.json create mode 100644 tf-module/uds_catalia/main.tf create mode 100755 tf-module/uds_catalia/smce_keys_assume_deployment.sh create mode 100644 tf-module/uds_catalia/uds_api_lambda.tf create mode 100644 tf-module/uds_catalia/variables.tf create mode 100644 tf-module/uds_catalia/versions.tf create mode 100644 tf-module/uds_catalia_iam/main.tf create mode 100755 tf-module/uds_catalia_iam/smce_keys_assume_deployment.sh create mode 100644 tf-module/uds_catalia_iam/uds_lambda_processing_role.tf create mode 100644 tf-module/uds_catalia_iam/variables.tf create mode 100644 tf-module/uds_catalia_iam/versions.tf diff --git a/tf-module/uds_catalia/catalia_ddb.tf b/tf-module/uds_catalia/catalia_ddb.tf new file mode 100644 index 00000000..3ee05311 --- /dev/null +++ b/tf-module/uds_catalia/catalia_ddb.tf @@ -0,0 +1,63 @@ +resource "aws_dynamodb_table" "uds_ctla_auth_ddb" { + name = "${var.prefix}-uds_ctla_auth_ddb" + billing_mode = "PAY_PER_REQUEST" + hash_key = "userGroup" + range_key = "projectMap" + + attribute { + name = "userGroup" + type = "S" + } + + attribute { + name = "projectMap" + type = "S" + } +# attribute { +# name = "access" +# type = "B" +# } + + global_secondary_index { + name = "${var.prefix}-uds_ctla_auth_ddb_gsi_user_group" + hash_key = "userGroup" + projection_type = "KEYS_ONLY" + } +# +# global_secondary_index { +# name = "GSI2_Project_Venue" +# hash_key = "Project" +# range_key = "Venue" +# projection_type = "KEYS_ONLY" +# } +} + +resource "aws_dynamodb_table" "uds_ctla_daac_handshake" { + name = "${var.prefix}-uds_ctla_daac_handshake" + billing_mode = "PAY_PER_REQUEST" + hash_key = "sourceProject" + range_key = "targetProject" + + attribute { + name = "sourceProject" + type = "S" + } + + attribute { + name = "targetProject" + type = "S" + } + +# global_secondary_index { +# name = "${var.prefix}-uds_auth_ddb_gsi_" +# hash_key = "userGroup" +# projection_type = "KEYS_ONLY" +# } +# +# global_secondary_index { +# name = "GSI2_Project_Venue" +# hash_key = "Project" +# range_key = "Venue" +# projection_type = "KEYS_ONLY" +# } +} \ No newline at end of file diff --git a/tf-module/uds_catalia/daac_archiver.tf b/tf-module/uds_catalia/daac_archiver.tf new file mode 100644 index 00000000..cb0a5e00 --- /dev/null +++ b/tf-module/uds_catalia/daac_archiver.tf @@ -0,0 +1,87 @@ +#resource "aws_lambda_function" "daac_archiver_request" { +# filename = local.lambda_file_name +# source_code_hash = filebase64sha256(local.lambda_file_name) +# function_name = "${var.prefix}-daac_archiver_request" +# role = var.lambda_processing_role_arn +# handler = "cumulus_lambda_functions.daac_archiver.lambda_function.lambda_handler_request" +# runtime = "python3.9" +# timeout = 300 +# environment { +# variables = { +# LOG_LEVEL = var.log_level +# ES_URL = aws_elasticsearch_domain.uds-es.endpoint +# ES_PORT = 443 +# } +# } +# +# vpc_config { +# subnet_ids = var.cumulus_lambda_subnet_ids +# security_group_ids = local.security_group_ids_set ? var.security_group_ids : [aws_security_group.unity_cumulus_lambda_sg[0].id] +# } +# tags = var.tags +#} + +#resource "aws_lambda_event_source_mapping" "daac_archiver_request_lambda_trigger" { // https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_event_source_mapping#sqs +# event_source_arn = aws_sqs_queue.granules_cnm_response_writer.arn +# function_name = aws_lambda_function.daac_archiver_request.arn +# batch_size = 1 +# enabled = true +#} + +######################################### + +resource "aws_lambda_function" "uds_daac_archiver_response" { + filename = local.lambda_file_name + source_code_hash = filebase64sha256(local.lambda_file_name) + function_name = "${var.prefix}-uds_daac_archiver_response" + role = local.lambda_role_arn + handler = "cumulus_lambda_functions.daac_archiver.lambda_function.lambda_handler_response" + runtime = "python3.9" + timeout = 300 + memory_size = 256 + environment { + variables = { + LOG_LEVEL = var.log_level + ARCHIVAL_STATUS_MECHANISM = "UDS" # UDS or FAST_STAC + DS_URL = "TODO" + SFA_USERNAME = "TODO" + SFA_PASSWORD = "TODO" + SFA_AUTH_KEY = "TODO" + SFA_AUTH_VALUE = "TODO" + SFA_BEARER_TOKEN = "TODO" + } + } + + vpc_config { + subnet_ids = var.cumulus_lambda_subnet_ids + security_group_ids = local.security_group_ids_set ? var.security_group_ids : [data.aws_security_group.uds_lambda_sg_no_ingress_all_egress.id] + } + tags = var.tags +} + + +resource "aws_sns_topic" "uds_daac_archiver_response" { + name = "${var.prefix}-uds_daac_archiver_response" + tags = var.tags + // TODO add access policy to be pushed from DAAC / other AWS account +} + +resource "aws_sns_topic_policy" "daac_archiver_response_policy" { + arn = aws_sns_topic.uds_daac_archiver_response.arn + policy = templatefile("${path.module}/daac_archiver_sns_policy.json", { + region: var.aws_region, + accountId: local.account_id, + snsName: "${var.prefix}-daac_archiver_response", + }) +} + +module "daac_archiver_response" { + source = "../sqs--sns-lambda-connector" + + account_id = local.account_id + lambda_arn = aws_lambda_function.uds_daac_archiver_response.arn + lambda_processing_role_arn = local.lambda_role_arn + name = "daac_archiver_response" + prefix = var.prefix + sns_arn = aws_sns_topic.uds_daac_archiver_response.arn +} \ No newline at end of file diff --git a/tf-module/uds_catalia/daac_archiver_sns_policy.json b/tf-module/uds_catalia/daac_archiver_sns_policy.json new file mode 100644 index 00000000..5845a9f6 --- /dev/null +++ b/tf-module/uds_catalia/daac_archiver_sns_policy.json @@ -0,0 +1,35 @@ +{ + "Version": "2008-10-17", + "Id": "__default_policy_ID", + "Statement": [ + { + "Sid": "1", + "Effect": "Allow", + "Principal": { + "Service": ["lambda.amazonaws.com"] + }, + "Action": [ + "SNS:GetTopicAttributes", + "SNS:SetTopicAttributes", + "SNS:AddPermission", + "SNS:RemovePermission", + "SNS:DeleteTopic", + "SNS:Subscribe", + "SNS:ListSubscriptionsByTopic", + "SNS:Publish" + ], + "Resource": "arn:aws:sns:${region}:${accountId}:${snsName}" + }, + { + "Sid": "2", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": [ + "SNS:Publish" + ], + "Resource": "arn:aws:sns:${region}:${accountId}:${snsName}" + } + ] +} diff --git a/tf-module/uds_catalia/main.tf b/tf-module/uds_catalia/main.tf new file mode 100644 index 00000000..cfcf0693 --- /dev/null +++ b/tf-module/uds_catalia/main.tf @@ -0,0 +1,33 @@ + provider "aws" { + region = var.aws_region + ignore_tags { + key_prefixes = ["gsfc-ngap"] + } +} +data "aws_caller_identity" "current" {} + +locals { + account_id = data.aws_caller_identity.current.account_id + lambda_file_name = "${path.module}/build/cumulus_lambda_functions_deployment.zip" + security_group_ids_set = var.security_group_ids != null + lambda_role_arn = data.aws_iam_role.lambda_processing.arn +} + +variable "buckets" { + description = "Map identifying the buckets for the deployment" + type = map(object({ name = string, type = string })) + default = {} +} +## resources = [for k, v in var.dynamo_tables : "${v.arn}/stream/*"] +#variable "dynamo_tables" { +# type = map(object({ name = string, arn = string })) +#} + +data "aws_security_group" "uds_lambda_sg_no_ingress_all_egress" { + name = "${var.prefix}-uds_lambda_sg_no_ingress_all_egress" +} + +data "aws_iam_role" "lambda_processing" { + # count = var.create_lambda_role ? 1 : 0 + name = "${var.prefix}-lambda-processing" +} \ No newline at end of file diff --git a/tf-module/uds_catalia/smce_keys_assume_deployment.sh b/tf-module/uds_catalia/smce_keys_assume_deployment.sh new file mode 100755 index 00000000..fa804b22 --- /dev/null +++ b/tf-module/uds_catalia/smce_keys_assume_deployment.sh @@ -0,0 +1,140 @@ +#!/bin/bash + +# This script fetches initial temporary credentials from an AWS SSO profile, +# then assumes a specified IAM role, and finally exports the assumed role's +# credentials as environment variables. It also configures a separate +# legacy profile with these assumed role credentials. + +# --- MANUALLY EDIT THIS SECTION --- +# +# The name of the AWS profile you configured for SSO login. +SSO_PROFILE_NAME="saml-pub" +# +# The name of the new, separate profile that will be created/updated +# with the temporary ASSUMED ROLE credentials. +TARGET_PROFILE_NAME="mdps-temp-creds-assumed" # Renamed for clarity +# +# The AWS Account ID where the target role exists. +# YOU MUST FIND AND ADD THIS 12-DIGIT ID. +TARGET_ACCOUNT_ID="979188186972" +# +# The name of the IAM role you want to assume. +TARGET_ROLE_NAME="smce_deployment" +# +# --- END OF MANUAL EDIT SECTION --- + + +# --- Script Logic (Do not edit below) --- + +# Check if jq is installed, as it's required for parsing JSON. +if ! command -v jq &> /dev/null +then + echo "❌ Error: 'jq' is not installed. Please install it to proceed." + echo "In CloudShell, you can install it with: sudo yum install -y jq" + exit 1 +fi + +# Validate placeholder Account ID +if [[ "$TARGET_ACCOUNT_ID" == "YOUR_ACCOUNT_ID_HERE" || -z "$TARGET_ACCOUNT_ID" ]]; then + echo "❌ Error: Please edit the script and replace 'YOUR_ACCOUNT_ID_HERE' with the correct AWS Account ID for the target role." + exit 1 +fi + +echo "🔄 Logging in via SSO profile '$SSO_PROFILE_NAME'..." + +# First, ensure the user has a valid SSO session by logging in. +# This may open a browser for authentication if your session has expired. +aws sso login --profile "$SSO_PROFILE_NAME" +if [ $? -ne 0 ]; then + echo "❌ AWS SSO login failed. Please complete the browser authentication and try again." + exit 1 +fi + +echo "✅ SSO login successful." +echo "🔄 Fetching initial credentials for SSO profile '$SSO_PROFILE_NAME'..." + +# Use the 'export-credentials' command with the 'process' format to get initial temporary credentials as JSON. +initial_credentials_json=$(aws configure export-credentials \ + --profile "$SSO_PROFILE_NAME" \ + --format process) + +if [ $? -ne 0 ]; then + echo "❌ Error: Failed to export initial credentials from AWS SSO profile." + exit 1 +fi + +# Use jq to parse the initial JSON and extract the credentials. +initial_access_key_id=$(echo "$initial_credentials_json" | jq -r '.AccessKeyId') +initial_secret_access_key=$(echo "$initial_credentials_json" | jq -r '.SecretAccessKey') +initial_session_token=$(echo "$initial_credentials_json" | jq -r '.SessionToken') + + +echo "what I need to verify exists ${initial_access_key_id}" +# if [ -z "$initial_access_key_id" ] || [ "$initial_access_key_id" == "null" ]; then +# echo "❌ Error: Could not parse initial credentials from the SSO response." +# exit 1 +# fi + +echo "✅ Successfully fetched initial temporary credentials." +echo "🔄 Assuming role '$TARGET_ROLE_NAME' in account '$TARGET_ACCOUNT_ID'..." + +# Temporarily set environment variables with the INITIAL credentials +# so the assume-role command can authenticate. +export AWS_ACCESS_KEY_ID="$initial_access_key_id" +export AWS_SECRET_ACCESS_KEY="$initial_secret_access_key" +export AWS_SESSION_TOKEN="$initial_session_token" + +# Construct the Role ARN +role_arn="arn:aws:iam::${TARGET_ACCOUNT_ID}:role/${TARGET_ROLE_NAME}" +# Create a unique session name including the username and date +session_name="${USER:-$(whoami)}-$(date +%Y%m%d%H%M%S)" + +# Call assume-role using the initial credentials +assumed_role_json=$(aws sts assume-role \ + --role-arn "$role_arn" \ + --role-session-name "$session_name" \ + --output json) + +# Unset the temporary initial credentials immediately for security +unset AWS_ACCESS_KEY_ID +unset AWS_SECRET_ACCESS_KEY +unset AWS_SESSION_TOKEN + +if [ $? -ne 0 ]; then + echo "❌ Error: Failed to assume role '$TARGET_ROLE_NAME'." + exit 1 +fi + +# Parse the credentials for the ASSUMED role +access_key_id=$(echo "$assumed_role_json" | jq -r '.Credentials.AccessKeyId') +secret_access_key=$(echo "$assumed_role_json" | jq -r '.Credentials.SecretAccessKey') +session_token=$(echo "$assumed_role_json" | jq -r '.Credentials.SessionToken') + +echo "✅ Successfully assumed role '$TARGET_ROLE_NAME'." +echo "" +echo "--- Configuring profile: '$TARGET_PROFILE_NAME' with ASSUMED role credentials ---" + +# Configure the new/target profile with the ASSUMED ROLE temporary credentials. +aws configure set aws_access_key_id "$access_key_id" --profile "$TARGET_PROFILE_NAME" +aws configure set aws_secret_access_key "$secret_access_key" --profile "$TARGET_PROFILE_NAME" +aws configure set aws_session_token "$session_token" --profile "$TARGET_PROFILE_NAME" +aws configure set region "us-west-2" --profile "$TARGET_PROFILE_NAME" + +echo "✅ Profile '$TARGET_PROFILE_NAME' has been configured with assumed role keys." +echo "" +echo "--- Exporting ASSUMED role environment variables ---" +echo "To set these for your current session, run this script with 'eval':" +echo "eval \$(./get_sso_keys.sh)" +echo "" + +# Print the export commands for the parent shell to evaluate. +# Ensure these use the FINAL assumed role credentials. +export AWS_ACCESS_KEY_ID=$access_key_id +export AWS_SECRET_ACCESS_KEY=$secret_access_key +export AWS_SESSION_TOKEN=$session_token +export AWS_REGION=us-west-2 +export AWS_DEFAULT_REGION=us-west-2 + +echo "" +echo "🚀 Environment variables are ready to be exported for the assumed role '$TARGET_ROLE_NAME'." + diff --git a/tf-module/uds_catalia/uds_api_lambda.tf b/tf-module/uds_catalia/uds_api_lambda.tf new file mode 100644 index 00000000..de34c91b --- /dev/null +++ b/tf-module/uds_catalia/uds_api_lambda.tf @@ -0,0 +1,33 @@ +resource "aws_lambda_function" "uds_api_1" { + filename = local.lambda_file_name + source_code_hash = filebase64sha256(local.lambda_file_name) + function_name = "${var.prefix}-uds_api_1" + role = local.lambda_role_arn + handler = "cumulus_lambda_functions.uds_api.web_service.handler" + runtime = "python3.9" + timeout = 300 + memory_size = 512 + environment { + variables = { + LOG_LEVEL = var.log_level +# UNITY_DEFAULT_PROVIDER = var.unity_default_provider + COLLECTION_CREATION_LAMBDA_NAME = "arn:aws:lambda:${var.aws_region}:${local.account_id}:function:${var.prefix}-uds_api_1" +# SNS_TOPIC_ARN = var.cnm_sns_topic_arn + DAAC_SNS_TOPIC_ARN = aws_sns_topic.uds_daac_archiver_response.arn + DAPA_API_PREIFX_KEY = var.dapa_api_prefix + CORS_ORIGINS = var.cors_origins + UDS_BASE_URL = var.uds_base_url +# ES_URL = aws_elasticsearch_domain.uds-es.endpoint +# ES_PORT = 443 +# REPORT_TO_EMS = var.report_to_ems + ADMIN_COMMA_SEP_GROUPS = var.comma_separated_admin_groups + DAPA_API_URL_BASE = "${var.uds_base_url}/${var.dapa_api_prefix}" + } + } + + vpc_config { + subnet_ids = var.cumulus_lambda_subnet_ids + security_group_ids = local.security_group_ids_set ? var.security_group_ids : [data.aws_security_group.uds_lambda_sg_no_ingress_all_egress.id] + } + tags = var.tags +} diff --git a/tf-module/uds_catalia/variables.tf b/tf-module/uds_catalia/variables.tf new file mode 100644 index 00000000..08e144e2 --- /dev/null +++ b/tf-module/uds_catalia/variables.tf @@ -0,0 +1,243 @@ +variable "log_level" { + type = string + default = "20" + description = "Lambda Log Level. Follow Python3 log level numbers info=20, warning=30, etc..." +} +variable "prefix" { + type = string +} +variable "aws_region" { + type = string + default = "us-west-2" +} + +variable "account_id" { + type = string + description = "AWS Account ID" +} + +variable "tags" { + description = "Tags to be applied to Cumulus resources that support tags" + type = map(string) + default = {} +} +variable "cumulus_lambda_vpc_id" { + type = string +} +variable "security_group_ids" { + description = "Security Group IDs for Lambdas" + type = list(string) + default = null +} +variable "cumulus_lambda_subnet_ids" { + description = "Subnet IDs for Lambdas" + type = list(string) + default = null +} +variable "permissions_boundary_arn" { + type = string + default = null +} + +variable "dapa_api_prefix" { + type = string + description = "An API Gateway resource to identify the Project Name that this specific resource is integrated with" + default = "am-uds-dapa" +} + +variable "uds_base_url" { + type = string +} +variable "cors_origins" { + default = "" + type = string + description = "Comma separated origins for CORS" +} +variable "comma_separated_admin_groups" { + type = string + description = "comma separated cognito groups which will be authorized as ADMIN group" +} + +#variable "valid_file_type" { +# type = string +# description = "metadata type name which is used to check if a file should be read as JSON metadata file" +# default = "metadata" +#} +#variable "metadata_stac_file_postfix" { +# type = string +# description = "Comma separated File Postfix for STAC JSON metadata files" +# default = "STAC.JSON" +#} +#variable "metadata_s4pa_file_postfix" { +# type = string +# description = "Comma separated File Postfix for PDS XML metadata files" +#} + + + + + +# + +# +#variable "cnm_sns_topic_arn" { +# description = "SNS ARN of CNM submission topic" +# type = string +#} +# +#variable "unity_default_provider" { +# type = string +# description = "default provider name" +# +#} +# + +# +# +#variable "unity_ui_base_url" { +# type = string +# description = "Example: https://www.dev.mdps.mcp.nasa.gov:4443. Make sure it does NOT end with `/`" +#} +# +#variable "report_to_ems" { +# type = string +# default = "TRUE" +#} +# +#variable "register_custom_metadata" { +# type = string +# default = "TRUE" +# description = "flag to decide if custom metadata will be added. " +#} +# +#variable "lambda_processing_role_arn" { +# type = string +#} +# +#variable "uds_es_cluster_instance_count" { +# type = number +# default = 2 +# description = "How many EC2 instances for Opensearch" +#} +# +#variable "uds_es_cluster_instance_type" { +# type = string +# default = "r5.large.elasticsearch" +# description = "EC2 instance type for Opensearch" +#} +# + +# +#variable "report_granules_topic" { +# type = string +# description = "SNS name" +#} +# +#variable "shared_services_rest_api_name" { +# type = string +# description = "Shared services REST API name" +# default = "Unity Shared Services REST API Gateway" +#} +# +#variable "rest_api_stage" { +# type = string +# description = "REST API Stage Name" +# default = "dev" +#} +# +#variable "unity_cognito_authorizer__authorizer_id" { +# type = string +# description = "Example: 0h9egs" +#} +# +#variable "cors_200_response_parameters" { +# type = map(bool) +# default = { +# "method.response.header.Access-Control-Allow-Credentials" = true +# "method.response.header.Access-Control-Allow-Headers" = true +# "method.response.header.Access-Control-Allow-Methods" = true +# "method.response.header.Access-Control-Allow-Origin" = true +# "method.response.header.Access-Control-Expose-Headers" = true +# "method.response.header.Access-Control-Max-Age" = true +# } +#} +# +#variable "cors_integration_response" { +# type = map(string) +# default = { +# "method.response.header.Access-Control-Allow-Credentials" = "'true'", +# "method.response.header.Access-Control-Allow-Headers" = "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'", +# "method.response.header.Access-Control-Allow-Methods" = "'DELETE,GET,HEAD,OPTIONS,PATCH,POST,PUT'", +# "method.response.header.Access-Control-Allow-Origin" = "'*'" +# "method.response.header.Access-Control-Expose-Headers" = "'Access-Control-Allow-Methods,Access-Control-Expose-Headers,Access-Control-Max-Age'" +# "method.response.header.Access-Control-Max-Age" = "'300'" +# } +#} + + +variable "health_check_marketplace_item" { + type = string + default = "shared-services" + description = "name of the portion of Marketplace item as path of SSM token" +} + +variable "health_check_component_name" { + type = string + default = "data-catalog" + description = "name of the portion of Marketplace item as path of SSM token" +} + +variable "is_deploying_healthcheck" { + type = bool + default = true + description = "flag to specify if deploying health check" +} + +variable "health_check_base_path" { + type = string + default = "/unity" + description = "base path for healthcheck which should start with, but not end with `/`" +} + +// << Variables for granules_cnm_ingester >> +variable "granules_cnm_ingester__sqs_visibility_timeout_seconds" { + type = number + default = 300 + description = "when a lambda ends in error, how much sqs should wait till it is retried again. (in seconds). defaulted to 5 min" +} + +variable "granules_cnm_ingester__sqs_retried_count" { + type = number + default = 3 + description = "How many times it is retried before pushing it to DLQ. defaulted to 3 times" +} + +variable "granules_cnm_ingester__lambda_concurrency" { + type = number + default = 20 + description = "How many Lambdas can be executed for CNM ingester concurrently" +} + +variable "granules_cnm_response_writer__lambda_concurrency" { + type = number + default = 20 + description = "How many Lambdas can be executed for CNM Response Writer concurrently" +} + +variable "granules_cnm_ingester__bucket_notification_prefix" { + type = string + default = "stage_out" + description = "path to the directory where catalogs.json will be written" +} + +variable "granules_cnm_ingester__s3_glob" { + type = string + default = "*unity*" + description = "GLOB expression that has all s3 buckets connecting to SNS topic" +} +#variable "granules_cnm_ingester__is_deploying_bucket" { +# type = bool +# default = false +# description = "flag to specify if deploying example bucket" +#} +// << Variables for granules_cnm_ingester END >> diff --git a/tf-module/uds_catalia/versions.tf b/tf-module/uds_catalia/versions.tf new file mode 100644 index 00000000..684b3650 --- /dev/null +++ b/tf-module/uds_catalia/versions.tf @@ -0,0 +1,3 @@ +terraform { + required_version = ">= 0.13" +} \ No newline at end of file diff --git a/tf-module/uds_catalia_iam/main.tf b/tf-module/uds_catalia_iam/main.tf new file mode 100644 index 00000000..29b50bca --- /dev/null +++ b/tf-module/uds_catalia_iam/main.tf @@ -0,0 +1,52 @@ + provider "aws" { + region = var.aws_region + ignore_tags { + key_prefixes = ["gsfc-ngap"] + } +} +data "aws_caller_identity" "current" {} + +locals { + account_id = data.aws_caller_identity.current.account_id + lambda_file_name = "${path.module}/build/cumulus_lambda_functions_deployment.zip" + security_group_ids_set = var.security_group_ids != null +} + +variable "buckets" { + description = "Map identifying the buckets for the deployment" + type = map(object({ name = string, type = string })) + default = {} +} +## resources = [for k, v in var.dynamo_tables : "${v.arn}/stream/*"] +#variable "dynamo_tables" { +# type = map(object({ name = string, arn = string })) +#} + +resource "aws_security_group" "uds_lambda_sg_no_ingress_all_egress" { + name = "${var.prefix}-uds_lambda_sg_no_ingress_all_egress" + vpc_id = var.cumulus_lambda_vpc_id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + self = true + } + tags = var.tags +} + +data "aws_iam_policy_document" "lambda_assume_role_policy" { + statement { + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com"] + } + } +} diff --git a/tf-module/uds_catalia_iam/smce_keys_assume_deployment.sh b/tf-module/uds_catalia_iam/smce_keys_assume_deployment.sh new file mode 100755 index 00000000..fa804b22 --- /dev/null +++ b/tf-module/uds_catalia_iam/smce_keys_assume_deployment.sh @@ -0,0 +1,140 @@ +#!/bin/bash + +# This script fetches initial temporary credentials from an AWS SSO profile, +# then assumes a specified IAM role, and finally exports the assumed role's +# credentials as environment variables. It also configures a separate +# legacy profile with these assumed role credentials. + +# --- MANUALLY EDIT THIS SECTION --- +# +# The name of the AWS profile you configured for SSO login. +SSO_PROFILE_NAME="saml-pub" +# +# The name of the new, separate profile that will be created/updated +# with the temporary ASSUMED ROLE credentials. +TARGET_PROFILE_NAME="mdps-temp-creds-assumed" # Renamed for clarity +# +# The AWS Account ID where the target role exists. +# YOU MUST FIND AND ADD THIS 12-DIGIT ID. +TARGET_ACCOUNT_ID="979188186972" +# +# The name of the IAM role you want to assume. +TARGET_ROLE_NAME="smce_deployment" +# +# --- END OF MANUAL EDIT SECTION --- + + +# --- Script Logic (Do not edit below) --- + +# Check if jq is installed, as it's required for parsing JSON. +if ! command -v jq &> /dev/null +then + echo "❌ Error: 'jq' is not installed. Please install it to proceed." + echo "In CloudShell, you can install it with: sudo yum install -y jq" + exit 1 +fi + +# Validate placeholder Account ID +if [[ "$TARGET_ACCOUNT_ID" == "YOUR_ACCOUNT_ID_HERE" || -z "$TARGET_ACCOUNT_ID" ]]; then + echo "❌ Error: Please edit the script and replace 'YOUR_ACCOUNT_ID_HERE' with the correct AWS Account ID for the target role." + exit 1 +fi + +echo "🔄 Logging in via SSO profile '$SSO_PROFILE_NAME'..." + +# First, ensure the user has a valid SSO session by logging in. +# This may open a browser for authentication if your session has expired. +aws sso login --profile "$SSO_PROFILE_NAME" +if [ $? -ne 0 ]; then + echo "❌ AWS SSO login failed. Please complete the browser authentication and try again." + exit 1 +fi + +echo "✅ SSO login successful." +echo "🔄 Fetching initial credentials for SSO profile '$SSO_PROFILE_NAME'..." + +# Use the 'export-credentials' command with the 'process' format to get initial temporary credentials as JSON. +initial_credentials_json=$(aws configure export-credentials \ + --profile "$SSO_PROFILE_NAME" \ + --format process) + +if [ $? -ne 0 ]; then + echo "❌ Error: Failed to export initial credentials from AWS SSO profile." + exit 1 +fi + +# Use jq to parse the initial JSON and extract the credentials. +initial_access_key_id=$(echo "$initial_credentials_json" | jq -r '.AccessKeyId') +initial_secret_access_key=$(echo "$initial_credentials_json" | jq -r '.SecretAccessKey') +initial_session_token=$(echo "$initial_credentials_json" | jq -r '.SessionToken') + + +echo "what I need to verify exists ${initial_access_key_id}" +# if [ -z "$initial_access_key_id" ] || [ "$initial_access_key_id" == "null" ]; then +# echo "❌ Error: Could not parse initial credentials from the SSO response." +# exit 1 +# fi + +echo "✅ Successfully fetched initial temporary credentials." +echo "🔄 Assuming role '$TARGET_ROLE_NAME' in account '$TARGET_ACCOUNT_ID'..." + +# Temporarily set environment variables with the INITIAL credentials +# so the assume-role command can authenticate. +export AWS_ACCESS_KEY_ID="$initial_access_key_id" +export AWS_SECRET_ACCESS_KEY="$initial_secret_access_key" +export AWS_SESSION_TOKEN="$initial_session_token" + +# Construct the Role ARN +role_arn="arn:aws:iam::${TARGET_ACCOUNT_ID}:role/${TARGET_ROLE_NAME}" +# Create a unique session name including the username and date +session_name="${USER:-$(whoami)}-$(date +%Y%m%d%H%M%S)" + +# Call assume-role using the initial credentials +assumed_role_json=$(aws sts assume-role \ + --role-arn "$role_arn" \ + --role-session-name "$session_name" \ + --output json) + +# Unset the temporary initial credentials immediately for security +unset AWS_ACCESS_KEY_ID +unset AWS_SECRET_ACCESS_KEY +unset AWS_SESSION_TOKEN + +if [ $? -ne 0 ]; then + echo "❌ Error: Failed to assume role '$TARGET_ROLE_NAME'." + exit 1 +fi + +# Parse the credentials for the ASSUMED role +access_key_id=$(echo "$assumed_role_json" | jq -r '.Credentials.AccessKeyId') +secret_access_key=$(echo "$assumed_role_json" | jq -r '.Credentials.SecretAccessKey') +session_token=$(echo "$assumed_role_json" | jq -r '.Credentials.SessionToken') + +echo "✅ Successfully assumed role '$TARGET_ROLE_NAME'." +echo "" +echo "--- Configuring profile: '$TARGET_PROFILE_NAME' with ASSUMED role credentials ---" + +# Configure the new/target profile with the ASSUMED ROLE temporary credentials. +aws configure set aws_access_key_id "$access_key_id" --profile "$TARGET_PROFILE_NAME" +aws configure set aws_secret_access_key "$secret_access_key" --profile "$TARGET_PROFILE_NAME" +aws configure set aws_session_token "$session_token" --profile "$TARGET_PROFILE_NAME" +aws configure set region "us-west-2" --profile "$TARGET_PROFILE_NAME" + +echo "✅ Profile '$TARGET_PROFILE_NAME' has been configured with assumed role keys." +echo "" +echo "--- Exporting ASSUMED role environment variables ---" +echo "To set these for your current session, run this script with 'eval':" +echo "eval \$(./get_sso_keys.sh)" +echo "" + +# Print the export commands for the parent shell to evaluate. +# Ensure these use the FINAL assumed role credentials. +export AWS_ACCESS_KEY_ID=$access_key_id +export AWS_SECRET_ACCESS_KEY=$secret_access_key +export AWS_SESSION_TOKEN=$session_token +export AWS_REGION=us-west-2 +export AWS_DEFAULT_REGION=us-west-2 + +echo "" +echo "🚀 Environment variables are ready to be exported for the assumed role '$TARGET_ROLE_NAME'." + diff --git a/tf-module/uds_catalia_iam/uds_lambda_processing_role.tf b/tf-module/uds_catalia_iam/uds_lambda_processing_role.tf new file mode 100644 index 00000000..4dbbc0fb --- /dev/null +++ b/tf-module/uds_catalia_iam/uds_lambda_processing_role.tf @@ -0,0 +1,186 @@ +variable "lambda_role_arn" { + description = "Optional pre-existing Lambda role ARN" + type = string + default = null +} + +variable "create_lambda_role" { + description = "Whether Terraform should create the IAM role" + type = bool + default = false +} + +locals { + all_bucket_names = [for k, v in var.buckets : v.name] + ddb_tbl_arns = ["arn:aws:dynamodb:${var.aws_region}:${var.account_id}:table/${var.prefix}*"] + lambda_role_arn =aws_iam_role.lambda_processing.arn +# lambda_role_arn = var.create_lambda_role ? aws_iam_role.lambda_processing.arn : var.lambda_role_arn +} + +resource "aws_iam_role" "lambda_processing" { +# count = var.create_lambda_role ? 1 : 0 + name = "${var.prefix}-lambda-processing" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json + permissions_boundary = var.permissions_boundary_arn + tags = var.tags +} +data "aws_iam_policy_document" "lambda_processing_policy" { + statement { + actions = [ + "ec2:CreateNetworkInterface", + "sns:publish", + "cloudformation:DescribeStacks", + "dynamodb:ListTables", + "ec2:DeleteNetworkInterface", + "ec2:DescribeNetworkInterfaces", + "events:DeleteRule", + "events:DescribeRule", + "events:DisableRule", + "events:EnableRule", + "events:ListRules", + "events:PutRule", + "kinesis:DescribeStream", + "kinesis:GetRecords", + "kinesis:GetShardIterator", + "kinesis:ListStreams", + "kinesis:PutRecord", + "lambda:GetFunction", + "lambda:invokeFunction", + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:DescribeLogStreams", + "logs:PutLogEvents", + "s3:ListAllMyBuckets", + "sns:List*", + "states:DescribeActivity", + "states:DescribeExecution", + "states:GetActivityTask", + "states:GetExecutionHistory", + "states:ListStateMachines", + "states:SendTaskFailure", + "states:SendTaskSuccess", + "states:StartExecution", + "states:StopExecution" + ] + resources = ["*"] + } + + statement { + actions = [ + "s3:GetAccelerateConfiguration", + "s3:GetLifecycleConfiguration", + "s3:GetReplicationConfiguration", + "s3:GetBucket*", + "s3:PutAccelerateConfiguration", + "s3:PutLifecycleConfiguration", + "s3:PutReplicationConfiguration", + "s3:PutBucket*", + "s3:ListBucket*", + ] + resources = [for b in local.all_bucket_names : "arn:aws:s3:::${b}"] + } + + statement { + actions = [ + "s3:AbortMultipartUpload", + "s3:GetObject*", + "s3:PutObject*", + "s3:ListMultipartUploadParts", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + ] + resources = [for b in local.all_bucket_names : "arn:aws:s3:::${b}/*"] + } + + statement { + actions = [ + "dynamodb:DeleteItem", + "dynamodb:GetItem", + "dynamodb:PutItem", + "dynamodb:Scan", + "dynamodb:UpdateItem", + "dynamodb:BatchWriteItem", + "dynamodb:UpdateContinuousBackups", + "dynamodb:DescribeContinuousBackups", + ] + resources = local.ddb_tbl_arns + } + + statement { + actions = ["dynamodb:Query"] + resources = local.ddb_tbl_arns + } + + statement { + actions = [ + "dynamodb:GetRecords", + "dynamodb:GetShardIterator", + "dynamodb:DescribeStream", + "dynamodb:ListStreams", + ] + resources = local.ddb_tbl_arns + } + + statement { + actions = [ + "sqs:SendMessage", + "sqs:ReceiveMessage", + "sqs:ChangeMessageVisibility", + "sqs:DeleteMessage", + "sqs:GetQueueUrl", + "sqs:GetQueueAttributes", + ] + resources = ["arn:aws:sqs:${var.aws_region}:${data.aws_caller_identity.current.account_id}:*"] + } + +# statement { +# actions = ["kms:Decrypt"] +# resources = [module.archive.provider_kms_key_arn] +# } +# +# statement { +# actions = ["secretsmanager:GetSecretValue"] +# resources = [ +# module.archive.cmr_password_secret_arn, +# module.archive.launchpad_passphrase_secret_arn, +# ] +# } +} + +resource "aws_iam_role_policy" "lambda_processing" { + name = "${var.prefix}_lambda_processing_policy" + role = aws_iam_role.lambda_processing.id + policy = data.aws_iam_policy_document.lambda_processing_policy.json +} +# +#data "aws_iam_policy_document" "lambda_assume_role_policy" { +# statement { +# actions = ["sts:AssumeRole"] +# principals { +# type = "Service" +# identifiers = ["lambda.amazonaws.com"] +# } +# } +#} + +resource "aws_iam_policy" "uds_lambda_processing_policy" { + name = "${var.prefix}-uds_lambda_processing_policy" + description = "IAM policy for Lambda to access S3 bucket and publish to SNS topic in another account" + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "sts:AssumeRole", + ], + "Resource": "arn:aws:iam::*:role/*" + }, + ] + }) +} + +resource "aws_iam_role_policy_attachment" "uds_lambda_processing_policy_attachment" { + role = aws_iam_role.lambda_processing.name + policy_arn = aws_iam_policy.uds_lambda_processing_policy.arn +} diff --git a/tf-module/uds_catalia_iam/variables.tf b/tf-module/uds_catalia_iam/variables.tf new file mode 100644 index 00000000..08e144e2 --- /dev/null +++ b/tf-module/uds_catalia_iam/variables.tf @@ -0,0 +1,243 @@ +variable "log_level" { + type = string + default = "20" + description = "Lambda Log Level. Follow Python3 log level numbers info=20, warning=30, etc..." +} +variable "prefix" { + type = string +} +variable "aws_region" { + type = string + default = "us-west-2" +} + +variable "account_id" { + type = string + description = "AWS Account ID" +} + +variable "tags" { + description = "Tags to be applied to Cumulus resources that support tags" + type = map(string) + default = {} +} +variable "cumulus_lambda_vpc_id" { + type = string +} +variable "security_group_ids" { + description = "Security Group IDs for Lambdas" + type = list(string) + default = null +} +variable "cumulus_lambda_subnet_ids" { + description = "Subnet IDs for Lambdas" + type = list(string) + default = null +} +variable "permissions_boundary_arn" { + type = string + default = null +} + +variable "dapa_api_prefix" { + type = string + description = "An API Gateway resource to identify the Project Name that this specific resource is integrated with" + default = "am-uds-dapa" +} + +variable "uds_base_url" { + type = string +} +variable "cors_origins" { + default = "" + type = string + description = "Comma separated origins for CORS" +} +variable "comma_separated_admin_groups" { + type = string + description = "comma separated cognito groups which will be authorized as ADMIN group" +} + +#variable "valid_file_type" { +# type = string +# description = "metadata type name which is used to check if a file should be read as JSON metadata file" +# default = "metadata" +#} +#variable "metadata_stac_file_postfix" { +# type = string +# description = "Comma separated File Postfix for STAC JSON metadata files" +# default = "STAC.JSON" +#} +#variable "metadata_s4pa_file_postfix" { +# type = string +# description = "Comma separated File Postfix for PDS XML metadata files" +#} + + + + + +# + +# +#variable "cnm_sns_topic_arn" { +# description = "SNS ARN of CNM submission topic" +# type = string +#} +# +#variable "unity_default_provider" { +# type = string +# description = "default provider name" +# +#} +# + +# +# +#variable "unity_ui_base_url" { +# type = string +# description = "Example: https://www.dev.mdps.mcp.nasa.gov:4443. Make sure it does NOT end with `/`" +#} +# +#variable "report_to_ems" { +# type = string +# default = "TRUE" +#} +# +#variable "register_custom_metadata" { +# type = string +# default = "TRUE" +# description = "flag to decide if custom metadata will be added. " +#} +# +#variable "lambda_processing_role_arn" { +# type = string +#} +# +#variable "uds_es_cluster_instance_count" { +# type = number +# default = 2 +# description = "How many EC2 instances for Opensearch" +#} +# +#variable "uds_es_cluster_instance_type" { +# type = string +# default = "r5.large.elasticsearch" +# description = "EC2 instance type for Opensearch" +#} +# + +# +#variable "report_granules_topic" { +# type = string +# description = "SNS name" +#} +# +#variable "shared_services_rest_api_name" { +# type = string +# description = "Shared services REST API name" +# default = "Unity Shared Services REST API Gateway" +#} +# +#variable "rest_api_stage" { +# type = string +# description = "REST API Stage Name" +# default = "dev" +#} +# +#variable "unity_cognito_authorizer__authorizer_id" { +# type = string +# description = "Example: 0h9egs" +#} +# +#variable "cors_200_response_parameters" { +# type = map(bool) +# default = { +# "method.response.header.Access-Control-Allow-Credentials" = true +# "method.response.header.Access-Control-Allow-Headers" = true +# "method.response.header.Access-Control-Allow-Methods" = true +# "method.response.header.Access-Control-Allow-Origin" = true +# "method.response.header.Access-Control-Expose-Headers" = true +# "method.response.header.Access-Control-Max-Age" = true +# } +#} +# +#variable "cors_integration_response" { +# type = map(string) +# default = { +# "method.response.header.Access-Control-Allow-Credentials" = "'true'", +# "method.response.header.Access-Control-Allow-Headers" = "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'", +# "method.response.header.Access-Control-Allow-Methods" = "'DELETE,GET,HEAD,OPTIONS,PATCH,POST,PUT'", +# "method.response.header.Access-Control-Allow-Origin" = "'*'" +# "method.response.header.Access-Control-Expose-Headers" = "'Access-Control-Allow-Methods,Access-Control-Expose-Headers,Access-Control-Max-Age'" +# "method.response.header.Access-Control-Max-Age" = "'300'" +# } +#} + + +variable "health_check_marketplace_item" { + type = string + default = "shared-services" + description = "name of the portion of Marketplace item as path of SSM token" +} + +variable "health_check_component_name" { + type = string + default = "data-catalog" + description = "name of the portion of Marketplace item as path of SSM token" +} + +variable "is_deploying_healthcheck" { + type = bool + default = true + description = "flag to specify if deploying health check" +} + +variable "health_check_base_path" { + type = string + default = "/unity" + description = "base path for healthcheck which should start with, but not end with `/`" +} + +// << Variables for granules_cnm_ingester >> +variable "granules_cnm_ingester__sqs_visibility_timeout_seconds" { + type = number + default = 300 + description = "when a lambda ends in error, how much sqs should wait till it is retried again. (in seconds). defaulted to 5 min" +} + +variable "granules_cnm_ingester__sqs_retried_count" { + type = number + default = 3 + description = "How many times it is retried before pushing it to DLQ. defaulted to 3 times" +} + +variable "granules_cnm_ingester__lambda_concurrency" { + type = number + default = 20 + description = "How many Lambdas can be executed for CNM ingester concurrently" +} + +variable "granules_cnm_response_writer__lambda_concurrency" { + type = number + default = 20 + description = "How many Lambdas can be executed for CNM Response Writer concurrently" +} + +variable "granules_cnm_ingester__bucket_notification_prefix" { + type = string + default = "stage_out" + description = "path to the directory where catalogs.json will be written" +} + +variable "granules_cnm_ingester__s3_glob" { + type = string + default = "*unity*" + description = "GLOB expression that has all s3 buckets connecting to SNS topic" +} +#variable "granules_cnm_ingester__is_deploying_bucket" { +# type = bool +# default = false +# description = "flag to specify if deploying example bucket" +#} +// << Variables for granules_cnm_ingester END >> diff --git a/tf-module/uds_catalia_iam/versions.tf b/tf-module/uds_catalia_iam/versions.tf new file mode 100644 index 00000000..684b3650 --- /dev/null +++ b/tf-module/uds_catalia_iam/versions.tf @@ -0,0 +1,3 @@ +terraform { + required_version = ">= 0.13" +} \ No newline at end of file From 23381a05dbd99e0faaad4a94662e535566df3a3a Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 10 Dec 2025 10:30:09 -0800 Subject: [PATCH 19/35] feat: adding code for catalya use case --- .../catalya_uds_api/__init__.py | 0 .../catalya_uds_api/auth_admin_api.py | 204 ++++++++++++++++++ .../catalya_uds_api/granules_archive_api.py | 132 ++++++++++++ .../catalya_uds_api/web_service.py | 69 ++++++ .../daac_archiver/catalia_auth_db.py | 5 + .../catalia_daac_handshakes_db.py | 7 + .../daac_archiver/daac_archiver_catalia.py | 39 +++- .../daac_archiver/test_catalia_auth_db.py | 27 ++- tf-module/uds_catalia/uds_api_lambda.tf | 2 +- 9 files changed, 474 insertions(+), 11 deletions(-) create mode 100644 cumulus_lambda_functions/catalya_uds_api/__init__.py create mode 100644 cumulus_lambda_functions/catalya_uds_api/auth_admin_api.py create mode 100644 cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py create mode 100644 cumulus_lambda_functions/catalya_uds_api/web_service.py diff --git a/cumulus_lambda_functions/catalya_uds_api/__init__.py b/cumulus_lambda_functions/catalya_uds_api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cumulus_lambda_functions/catalya_uds_api/auth_admin_api.py b/cumulus_lambda_functions/catalya_uds_api/auth_admin_api.py new file mode 100644 index 00000000..50cbe37e --- /dev/null +++ b/cumulus_lambda_functions/catalya_uds_api/auth_admin_api.py @@ -0,0 +1,204 @@ +from typing import Union + +from cumulus_lambda_functions.daac_archiver.catalia_auth_db import CataliaAuthDb +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator +from cumulus_lambda_functions.uds_api.fast_api_utils import FastApiUtils +from cumulus_lambda_functions.uds_api.web_service_constants import WebServiceConstants +from fastapi import APIRouter, HTTPException, Request, Response + +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + + +import json +import os + +from pydantic import BaseModel + +from cumulus_lambda_functions.lib.authorization.uds_authorizer_abstract import UDSAuthorizorAbstract +from cumulus_lambda_functions.lib.authorization.uds_authorizer_factory import UDSAuthorizerFactory +from mdps_ds_lib.lib.utils.json_validator import JsonValidator + +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator + +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + + +class AuthDeleteModel(BaseModel): + source: str + target: str + group_name: str + + +delete_schema = { + 'type': 'object', + 'required': ['source', 'target', 'group_name'], + 'properties': { + 'source': {'type': 'string'}, + 'target': {'type': 'string'}, + 'group_name': {'type': 'string'}, + } +} + + +class AuthListModel(BaseModel): + group_name: list[str] + + +list_schema = { + 'type': 'object', + 'properties': { + 'tenant': {'type': 'string'}, + 'venue': {'type': 'string'}, + 'group_names': { + 'type': 'array', + 'items': {'type': 'string'}, + 'minItems': 1, + }, + } +} + + +class AuthAddModel(BaseModel): + source: str + target: str + group_name: str + access: bool + + +add_schema = { + 'type': 'object', + 'required': ['source', 'target', 'group_name', 'access'], + 'properties': { + 'source': {'type': 'string'}, + 'target': {'type': 'string'}, + 'group_name': {'type': 'string'}, + 'access': {'type': 'boolean'}, + } +} + + +class AuthCrud: + def __init__(self, authorization_info, request_body): + required_env = ['ADMIN_COMMA_SEP_GROUPS', 'CATALYA_DB_NAME'] + if not all([k in os.environ for k in required_env]): + raise EnvironmentError(f'one or more missing env: {required_env}') + self.__request_body = request_body + self.__authorization_info = authorization_info + self.__admin_groups = [k.strip() for k in os.getenv('ADMIN_COMMA_SEP_GROUPS').split(',')] + self.__cad = CataliaAuthDb(os.getenv('CATALYA_DB_NAME')) + + def is_admin(self): + belonged_admin_groups = list(set(self.__admin_groups) & set(self.__authorization_info['ldap_groups'])) + if len(belonged_admin_groups) < 1: + LOGGER.warn(f'unauthorized attempt to admin function: {self.__authorization_info}') + return { + 'statusCode': 403, + 'body': {'message': f'user is not in admin groups: {self.__admin_groups}'} + } + return { + 'statusCode': 200, + 'body': {} + } + + def list_all_record(self): + return { + 'statusCode': 501, + 'body': {'message': 'Not Implemented Yet'} + } + # return { + # 'statusCode': 200, + # 'body': all_records + # } + + def add_new_record(self): + body_validator_result = JsonValidator(add_schema).validate(self.__request_body) + if body_validator_result is not None: + LOGGER.error(f'invalid add body: {body_validator_result}. request_body: {self.__request_body}') + return { + 'statusCode': 500, + 'body': f'invalid add body: {body_validator_result}. request_body: {self.__request_body}' + } + self.__cad.add(self.__request_body['group_name'], self.__request_body['source'], self.__request_body['target'], self.__request_body['access']) + return { + 'statusCode': 200, + 'body': {'message': 'inserted'} + } + + def delete_record(self): + body_validator_result = JsonValidator(delete_schema).validate(self.__request_body) + if body_validator_result is not None: + LOGGER.error(f'invalid delete body: {body_validator_result}. request_body: {self.__request_body}') + return { + 'statusCode': 500, + 'body': f'invalid delete body: {body_validator_result}. request_body: {self.__request_body}' + } + self.__cad.delete(self.__request_body['group_name'], self.__request_body['source'], self.__request_body['target']) + return { + 'statusCode': 200, + 'body': {'message': 'deleted'} + } + + +router = APIRouter( + prefix=f'/{WebServiceConstants.ADMIN}/auth', + tags=["Admin Records CRUD (Admins-Only)"], + responses={404: {"description": "Not found"}}, +) + +@router.delete("") +@router.delete("/") +async def delete_auth_mapping(request: Request, delete_body: AuthDeleteModel): + """ + Deleting one authorization mapping + """ + LOGGER.debug(f'started delete_auth_mapping') + auth_info = FastApiUtils.get_authorization_info(request) + auth_crud = AuthCrud(auth_info, delete_body.model_dump()) + is_admin_result = auth_crud.is_admin() + if is_admin_result['statusCode'] != 200: + raise HTTPException(status_code=is_admin_result['statusCode'], detail=is_admin_result['body']) + delete_result = auth_crud.delete_record() + if delete_result['statusCode'] == 200: + return delete_result['body'] + raise HTTPException(status_code=delete_result['statusCode'], detail=delete_result['body']) + +@router.post("") +@router.post("/") +async def add_auth_mapping(request: Request, new_body: AuthAddModel): + """ + Adding a new Authorization mapping + """ + LOGGER.debug(f'started add_auth_mapping. sss {new_body.model_dump()}') + auth_info = FastApiUtils.get_authorization_info(request) + auth_crud = AuthCrud(auth_info, new_body.model_dump()) + is_admin_result = auth_crud.is_admin() + if is_admin_result['statusCode'] != 200: + raise HTTPException(status_code=is_admin_result['statusCode'], detail=is_admin_result['body']) + add_result = auth_crud.add_new_record() + if add_result['statusCode'] == 200: + return add_result['body'] + raise HTTPException(status_code=add_result['statusCode'], detail=add_result['body']) + + +@router.get("") +@router.get("/") +async def list_auth_mappings(request: Request, tenant: Union[str, None]=None, venue: Union[str, None]=None, group_names: Union[str, None]=None): + """ + Listing all exsiting Authorization Mapping. + + """ + LOGGER.debug(f'started list_auth_mappings') + auth_info = FastApiUtils.get_authorization_info(request) + query_body = { + 'tenant': tenant, + 'venue': venue, + 'ldap_group_names': group_names if group_names is None else [k.strip() for k in group_names.split(',')], + } + auth_crud = AuthCrud(auth_info, query_body) + is_admin_result = auth_crud.is_admin() + if is_admin_result['statusCode'] != 200: + raise HTTPException(status_code=is_admin_result['statusCode'], detail=is_admin_result['body']) + query_result = auth_crud.list_all_record() + if query_result['statusCode'] == 200: + return query_result['body'] + raise HTTPException(status_code=query_result['statusCode'], detail=query_result['body']) diff --git a/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py new file mode 100644 index 00000000..16b9c2c6 --- /dev/null +++ b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py @@ -0,0 +1,132 @@ +import json +import os +from typing import Optional + +from cumulus_lambda_functions.daac_archiver.catalia_auth_db import CataliaAuthDb +from cumulus_lambda_functions.daac_archiver.catalia_daac_handshakes_db import CataliaDaacHandshakesDb +from cumulus_lambda_functions.daac_archiver.daac_archiver_catalia import DaacArchiverCatalia +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator +from cumulus_lambda_functions.uds_api.web_service_constants import WebServiceConstants +from cumulus_lambda_functions.uds_api.fast_api_utils import FastApiUtils +from fastapi import APIRouter, HTTPException, Request +from pydantic import BaseModel + +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + +router = APIRouter( + prefix=f'/{WebServiceConstants.COLLECTIONS}', + tags=["Granules Archive CRUD API"], + responses={404: {"description": "Not found"}}, +) + +class ArchivingTypesModel(BaseModel): + data_type: str + file_extension: Optional[list[str]] = [] + +class DaacUpdateModel(BaseModel): + daac_collection_id: str + api_key: str + daac_provider: Optional[str] = None + daac_data_version: Optional[str] = None + daac_sns_topic_arn: Optional[str] = None + daac_role_arn: Optional[str] = None + daac_role_session_name: Optional[str] = None + archiving_types: Optional[list[ArchivingTypesModel]] = None + +class InternalDDBConnector: + def __init__(self): + required_env = ['CATALYA_DAAC_AGREEMENT_DB_NAME', 'CATALYA_DB_NAME'] + if not all([k in os.environ for k in required_env]): + raise EnvironmentError(f'one or more missing env: {required_env}') + self.cad = CataliaAuthDb(os.getenv('CATALYA_DB_NAME')) + self.cdhsd = CataliaDaacHandshakesDb(os.getenv('CATALYA_DAAC_AGREEMENT_DB_NAME')) + self.auth_info = {} + self.configured_daac_configs = [] + + def archive_methods_initiator(self, request, collection_id, daac_collection_id): + LOGGER.debug(f'started archive_methods_initiator.') + self.auth_info = FastApiUtils.get_authorization_info(request) + if daac_collection_id is None: + self.configured_daac_configs = self.cdhsd.search(collection_id) + configured_daac_ids = [k[self.cdhsd.target_project] for k in self.configured_daac_configs] + else: + configured_daac_ids = [daac_collection_id] + authorized_daacs = self.cad.get_authorized_daac_full(self.auth_info.get('ldap_groups'), collection_id, configured_daac_ids) + if len(authorized_daacs) < 1: + LOGGER.debug(f'user: {self.auth_info["username"]} is not authorized for {collection_id}') + raise HTTPException(status_code=403, detail=json.dumps({ + 'message': 'not authorized to execute this action' + })) + return authorized_daacs + +@router.post("/{collection_id}/{daac_collection_id}/archive") +@router.post("/{collection_id}/{daac_collection_id}/archive/") +async def add_daac_archive_config(request: Request, collection_id: str, daac_collection_id: str, new_body: DaacUpdateModel): + LOGGER.debug(f'started add_daac_archive_config. {new_body.model_dump()}') + i1 = InternalDDBConnector() + authorized_daacs = i1.archive_methods_initiator(request, collection_id, daac_collection_id) + authorized_ldaps = [k['userGroup'] for k in authorized_daacs] + b1 = new_body.model_dump() + try: + # def add(self, catalia_collection, daac_collection, api_key, provider, data_version, sns_topic_arn, role_arn, role_session_name, archiving_types, user, user_group): + i1.cdhsd.add(collection_id, daac_collection_id, b1['api_key'], b1['daac_provider'], b1['daac_data_version'], + b1['daac_sns_topic_arn'], b1['daac_role_arn'], b1['daac_role_session_name'], b1['archiving_types'], i1.auth_info['username'], authorized_ldaps) + except Exception as e: + LOGGER.exception(f'error while add_daac_archive_config: {b1}') + raise HTTPException(status_code=500, detail=e) + return {'message': 'archive config added'} + +@router.delete("/{collection_id}/{daac_collection_id}/archive") +@router.delete("/{collection_id}/{daac_collection_id}/archive/") +async def delete_daac_archive_config(request: Request, collection_id: str, daac_collection_id: str): + LOGGER.debug(f'started delete_daac_archive_config.') + i1 = InternalDDBConnector() + authorized_daacs = i1.archive_methods_initiator(request, collection_id, daac_collection_id) + try: + i1.cdhsd.delete(collection_id, daac_collection_id) + except Exception as e: + LOGGER.exception(f'error while delete_daac_archive_config: {collection_id}, {daac_collection_id}') + raise HTTPException(status_code=500, detail=e) + return {'message': 'archive config deleted'} + +@router.get("/{collection_id}/{daac_collection_id}/archive") +@router.get("/{collection_id}/{daac_collection_id}/archive/") +async def get_daac_archive_config(request: Request, collection_id: str, daac_collection_id: str): + LOGGER.debug(f'started get_daac_archive_config.') + i1 = InternalDDBConnector() + authorized_daacs = i1.archive_methods_initiator(request, collection_id, daac_collection_id) + try: + result = i1.cdhsd.get_single(collection_id, daac_collection_id) + except Exception as e: + LOGGER.exception(f'error while get_daac_archive_config: {collection_id}, {daac_collection_id}') + raise HTTPException(status_code=500, detail=e) + return {'result': result} + +@router.put("/{collection_id}/archive/{granule_id}") +@router.put("/{collection_id}/archive/{granule_id}/") +async def archive_single_granule(request: Request, collection_id: str, granule_id: str): + LOGGER.debug(f'started archive_single_granule.') + i1 = InternalDDBConnector() + authorized_daacs = i1.archive_methods_initiator(request, collection_id, None) + authorized_ldaps = set([k['userGroup'] for k in authorized_daacs]) + authorized_configured_daac_configs = [k for k in i1.configured_daac_configs if k[i1.cdhsd.target_project] in authorized_ldaps] + dac = DaacArchiverCatalia() + dac.staged_s3_bucket = os.getenv('CATALYA_UDS_STAGING_BUCKET') + dac.daac_agreements = authorized_configured_daac_configs + dac.archive_granule(collection_id, granule_id) + return {'message': 'archive initiated'} + +@router.put("/{collection_id}/archive") +@router.put("/{collection_id}/archive/") +async def archive_entire_collection(request: Request, collection_id: str): + LOGGER.debug(f'started archive_entire_collection.') + i1 = InternalDDBConnector() + authorized_daacs = i1.archive_methods_initiator(request, collection_id, None) + authorized_ldaps = set([k['userGroup'] for k in authorized_daacs]) + authorized_configured_daac_configs = [k for k in i1.configured_daac_configs if k[i1.cdhsd.target_project] in authorized_ldaps] + dac = DaacArchiverCatalia() + dac.staged_s3_bucket = os.getenv('CATALYA_UDS_STAGING_BUCKET') + dac.daac_agreements = authorized_configured_daac_configs + dac.archive_granule(collection_id, granule_id) + return {'message': 'archive initiated'} + diff --git a/cumulus_lambda_functions/catalya_uds_api/web_service.py b/cumulus_lambda_functions/catalya_uds_api/web_service.py new file mode 100644 index 00000000..ce375ba4 --- /dev/null +++ b/cumulus_lambda_functions/catalya_uds_api/web_service.py @@ -0,0 +1,69 @@ +from fastapi.staticfiles import StaticFiles + +from cumulus_lambda_functions.uds_api.fast_api_utils import FastApiUtils +from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator +from dotenv import load_dotenv + +load_dotenv() + +import uvicorn +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from mangum import Mangum +from starlette.requests import Request + +from cumulus_lambda_functions.uds_api.routes_api import main_router +LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) + +api_base_prefix = FastApiUtils.get_api_base_prefix() +app = FastAPI(title='Unity UDS API', + description='API to interact with UDS services', + docs_url=f'/{api_base_prefix}/docs', + redoc_url=f'/{api_base_prefix}/redoc', + openapi_url=f'/{api_base_prefix}/openapi', + ) +app.add_middleware( + CORSMiddleware, + allow_origins=FastApiUtils.get_cors_origins(), + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +app.include_router(main_router, prefix=f'/{api_base_prefix}') + + +""" +Accept-Ranges: +bytes +Access-Control-Allow-Methods: +HEAD, GET +Access-Control-Allow-Origin: +* +Access-Control-Expose-Headers: +ETag, x-amz-meta-custom-header +Access-Control-Max-Age: +3000 +""" + +# https://fastapi.tiangolo.com/tutorial/cors/ + +@app.get("/") +async def root(request: Request): + return {"message": "Hello World", "root_path": request.scope.get("root_path")} + +@app.get(f'/{api_base_prefix}/openapi') +@app.get(f'/{api_base_prefix}/openapi/') +async def get_open_api(request: Request): + default_open_api_doc = app.openapi() + dropping_keys = [k for k in default_open_api_doc['paths'].keys() if not k.endswith('/')] + for k in dropping_keys: + default_open_api_doc['paths'].pop(k) + return app.openapi() + +# to make it work with Amazon Lambda, we create a handler object +handler = Mangum(app=app) + +if __name__ == '__main__': + uvicorn.run("web_service:app", port=8005, log_level="info", reload=True) + print("running") diff --git a/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py index f4020480..abd02455 100644 --- a/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py +++ b/cumulus_lambda_functions/daac_archiver/catalia_auth_db.py @@ -38,6 +38,11 @@ def add(self, user_group, collection, daac_collection, access: bool): self.__ddb.add(user_group, sk1, item1, replace=True) return + def delete(self, user_group, collection, daac_collection): + sk1 = f'{collection}->{daac_collection}' + self.__ddb.delete(user_group, sk1) + return + def get_authorized_catalia(self, user_group: list[str], catalia_collection): results = [] for group in user_group: diff --git a/cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py b/cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py index 81c8c29a..88c6785f 100644 --- a/cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py +++ b/cumulus_lambda_functions/daac_archiver/catalia_daac_handshakes_db.py @@ -35,6 +35,13 @@ def add(self, catalia_collection, daac_collection, api_key, provider, data_versi self.__ddb.add(catalia_collection, daac_collection, item1, replace=True) return + def delete(self, catalia_collection, daac_collection): + self.__ddb.delete(catalia_collection, daac_collection) + return + + def get_single (self, catalia_collection, daac_collection): + return self.__ddb.get(catalia_collection, daac_collection) + def search(self, catalia_collection): results = self.__ddb.get(catalia_collection, secondary_key=None) return results diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index b7a1de89..16b2887c 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -53,12 +53,38 @@ class DaacArchiverCatalia: def __init__(self): self.__sns = AwsSns() self.__s3 = AwsS3() - self.__staged_s3_bucket = 'TODO' # TODO + self.__staged_s3_bucket = 'SET_ME_UP' # TODO + self.__daac_agreements = [] self.__sfa_client = SFAClientFactory().get_instance_from_env() self.__archiving_granules_stac = None self.__archiving_status_extension_url = "https://stac-extensions.github.io/archival_statuses/v1.0.0/schema.json" self.__cnm_msg_version = "1.6.0" - self.__daac_agreements = [] + + @property + def staged_s3_bucket(self): + return self.__staged_s3_bucket + + @staged_s3_bucket.setter + def staged_s3_bucket(self, val): + """ + :param val: + :return: None + """ + self.__staged_s3_bucket = val + return + + @property + def daac_agreements(self): + return self.__daac_agreements + + @daac_agreements.setter + def daac_agreements(self, val): + """ + :param val: + :return: None + """ + self.__daac_agreements = val + return def archive_granule(self, collection_id, granule_id): # TODO look up granule details @@ -80,7 +106,6 @@ def archive_granule_json(self): if self.__archiving_granules_stac is None: raise ValueError(f'NULL archiving granule. Pls retrieve it first.') self.add_archival_extension() - self.get_daac_configs() if len(self.__daac_agreements) < 1: LOGGER.debug(f'this collection does not have any daac. {self.__archiving_granules_stac}') return @@ -122,11 +147,6 @@ def add_archival_extension(self): LOGGER.debug(f'Initialized archival:status property for STAC item') return self - def get_daac_configs(self): - # TODO - # update self.__daac_agreements - return - def stage_files(self): """ 1. Check directory s3://// @@ -142,7 +162,7 @@ def stage_files(self): if self.__archiving_granules_stac is None: raise ValueError(f'NULL archiving granule. Cannot stage files.') - if self.__staged_s3_bucket == 'TODO': + if self.__staged_s3_bucket == 'SET_ME_UP': raise ValueError(f'Staged S3 bucket is not configured. Please set self.__staged_s3_bucket.') # Get collection and item IDs @@ -434,6 +454,7 @@ def _convert_stac_asset_to_cnm_file(self, asset_key: str, asset): LOGGER.debug(f'Converted STAC asset {asset_key} to CNM file: {cnm_file}') return cnm_file + def send_daac_sns(self, daac_config): """ diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py b/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py index 8f570145..3c8075f0 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_catalia_auth_db.py @@ -4,8 +4,9 @@ class TestCataliaAuthDb(TestCase): + catalia_db_name = 'h5s_on_disk_william_local' def test_01(self): - cad = CataliaAuthDb('h5s_on_disk_william_local') + cad = CataliaAuthDb(self.catalia_db_name) cad.add('A', 'X:Y:.*', '.*', False) cad.add('A', 'X:Y:L0.*', 'M:N:L0.*', False) cad.add('A', 'X:Y:L0_V1', 'M:N:L0.*', False) @@ -19,3 +20,27 @@ def test_01(self): self.assertTrue(cad.authorize('A', 'x:y:L0_V2', 'M:N:L0_V1')) self.assertFalse(cad.authorize('A', 'x:y:L0_V2', 'M:N:L1_V1')) return + + def test_01(self): + cad = CataliaAuthDb(self.catalia_db_name) + cad.add('A', 'X:Y:.*', '.*', False) + cad.add('A', 'X:Y:L0.*', 'M:N:L0.*', False) + cad.add('A', 'X:Y:L0_V1', 'M:N:L0.*', False) + cad.add('A', 'X:Y:L0.*', 'M:N:L0.*', True) + cad.add('A', 'X:Y:L1_V1', 'M:N:L1.*', True) + + user_groups = cad.get_authorized_catalia(['A', 'B', 'C'], 'X:Y:L1_V1') + self.assertEqual(2, len(user_groups), f'wrong user groups: {user_groups}') + + daacs = cad.get_authorized_daac(user_groups, 'X:Y:L1_V1', ['M:N:L1_V1', 'M:N:L1_V2', 'M:N:L0_V1']) + self.assertEqual(2, len(daacs), f'wrong user groups: {user_groups}') + daacs = cad.get_authorized_daac(user_groups, 'X:Y:L0_V1', ['M:N:L1_V1', 'M:N:L0_V2', 'M:N:L0_V1']) + self.assertEqual(1, len(daacs), f'wrong user groups: {user_groups}') + daacs = cad.get_authorized_daac(user_groups, 'X:Y:L0_V1', ['M:N:L0_V2', 'M:N:L0_V1']) + self.assertEqual(0, len(daacs), f'wrong user groups: {user_groups}') + user_groups = cad.get_authorized_catalia(['B', 'C'], 'X:Y:L1_V1') + self.assertEqual(0, len(user_groups), f'wrong user groups: {user_groups}') + user_groups = cad.get_authorized_catalia(['A', 'B', 'C'], 'X:Y1:L2_V1') + self.assertEqual(0, len(user_groups), f'wrong user groups: {user_groups}') + debug = 1 + return diff --git a/tf-module/uds_catalia/uds_api_lambda.tf b/tf-module/uds_catalia/uds_api_lambda.tf index de34c91b..97947d96 100644 --- a/tf-module/uds_catalia/uds_api_lambda.tf +++ b/tf-module/uds_catalia/uds_api_lambda.tf @@ -3,7 +3,7 @@ resource "aws_lambda_function" "uds_api_1" { source_code_hash = filebase64sha256(local.lambda_file_name) function_name = "${var.prefix}-uds_api_1" role = local.lambda_role_arn - handler = "cumulus_lambda_functions.uds_api.web_service.handler" + handler = "cumulus_lambda_functions.catalya_uds_api.web_service.handler" runtime = "python3.9" timeout = 300 memory_size = 512 From 20a5f3d4d16c7828b3534b0651be1812f0f5929d Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 10 Dec 2025 11:42:11 -0800 Subject: [PATCH 20/35] chore: prep for collection archive --- .../daac_archiver/daac_archiver_catalia.py | 16 +++++++++++--- cumulus_lambda_functions/lib/uds_utils.py | 22 +++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 cumulus_lambda_functions/lib/uds_utils.py diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index 16b2887c..1a5ceb6a 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -7,6 +7,8 @@ from pystac import Item from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator +from cumulus_lambda_functions.lib.uds_utils import backoff_wrapper + LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) @@ -86,9 +88,17 @@ def daac_agreements(self, val): self.__daac_agreements = val return + def archive_collection(self, collection_id): + # TODO + return self + + def archive_granules(self, granule_jsons: list): + # TODO + return self + def archive_granule(self, collection_id, granule_id): # TODO look up granule details - self.__archiving_granules_stac = self.__sfa_client.get_item(collection_id, item_id=granule_id) + self.__archiving_granules_stac = backoff_wrapper(self.__sfa_client.get_item, collection_id, item_id=granule_id) LOGGER.debug(f'retrieved stac_item from STAC Fast API: {self.__archiving_granules_stac}') self.archive_granule_json() return self @@ -196,7 +206,7 @@ def stage_files(self): try: # Copy file to staging bucket - self.__s3.copy_artifact(source_bucket, source_key, self.__staged_s3_bucket, dest_key, copy_tags=False, delete_original=False) + backoff_wrapper(self.__s3.copy_artifact, source_bucket, source_key, self.__staged_s3_bucket, dest_key, copy_tags=False, delete_original=False) LOGGER.info(f'Copied {source_href} to {dest_href}') # Update asset href to new location @@ -265,7 +275,7 @@ def update_status(self, archival_status: dict): stac_item_dict = self.__archiving_granules_stac.to_dict() # Update the item using the STAC Fast API client - updated_item = self.__sfa_client.update_item( + updated_item = backoff_wrapper(self.__sfa_client.update_item, collection_id=collection_id, item_id=item_id, item=stac_item_dict diff --git a/cumulus_lambda_functions/lib/uds_utils.py b/cumulus_lambda_functions/lib/uds_utils.py new file mode 100644 index 00000000..fe8dbe17 --- /dev/null +++ b/cumulus_lambda_functions/lib/uds_utils.py @@ -0,0 +1,22 @@ +import backoff + +class JitteredBackoffException(Exception): + pass + + +@backoff.on_exception( + backoff.expo, + Exception, + max_value=13, + max_time=34, + giveup=lambda e: isinstance(e, JitteredBackoffException), +) +def backoff_wrapper(func, *args, **kwargs): + """ + Run a function wrapped in exponential backoff. + :param func: function or method object + :param args: args to pass to function + :param kwargs: keyword args to pass to function + :return: + """ + return func(*args, **kwargs) \ No newline at end of file From 5aa7f42eae115e4ad389916d1906e01ed9261192 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 10 Dec 2025 12:17:43 -0800 Subject: [PATCH 21/35] feat: untested AI code for collection archive --- .../daac_archiver/daac_archiver_catalia.py | 152 +++++++++++++++++- 1 file changed, 148 insertions(+), 4 deletions(-) diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index 1a5ceb6a..1f501769 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -89,11 +89,155 @@ def daac_agreements(self, val): return def archive_collection(self, collection_id): - # TODO - return self + """ + Archive all granules in a collection by querying the STAC Fast API + and processing them in parallel. + + :param collection_id: The collection ID to archive all granules from + :return: self + """ + LOGGER.info(f'Starting collection archival for collection: {collection_id}') + + try: + # Query all granules in the collection with pagination + all_granule_jsons = [] + page = 1 + limit = 100 # Reasonable batch size + + while True: + LOGGER.debug(f'Fetching granules page {page} for collection {collection_id}') + + # Use backoff wrapper for STAC API call + granules_response = backoff_wrapper( + self.__sfa_client.get_items, + collection_id=collection_id, + limit=limit, + offset=(page - 1) * limit + ) + + if not granules_response or 'features' not in granules_response: + LOGGER.warning(f'No granules found in response for collection {collection_id}, page {page}') + break + + granules = granules_response['features'] + if not granules: + LOGGER.info(f'No more granules found for collection {collection_id}, stopping pagination') + break + + all_granule_jsons.extend(granules) + LOGGER.info(f'Fetched {len(granules)} granules from page {page}, total so far: {len(all_granule_jsons)}') + + # If we got fewer than the limit, we're done + if len(granules) < limit: + break + + page += 1 + + LOGGER.info(f'Found {len(all_granule_jsons)} total granules in collection {collection_id}') + + if not all_granule_jsons: + LOGGER.warning(f'No granules found in collection {collection_id}') + return self + + # Process all granules in parallel + return self.archive_granules(all_granule_jsons) + + except Exception as e: + LOGGER.error(f'Failed to archive collection {collection_id}: {e}') + raise RuntimeError(f'Collection archival failed: {e}') from e + + def archive_granules(self, granule_jsons: list, max_workers=10): + """ + Process multiple granules in parallel for archival. + + :param granule_jsons: List of granule JSON objects from STAC Fast API + :param max_workers: Maximum number of parallel workers (default: 10) + :return: self + """ + from concurrent.futures import ThreadPoolExecutor, as_completed + + if not granule_jsons: + LOGGER.warning('No granules provided for archival') + return self + + LOGGER.info(f'Starting parallel archival of {len(granule_jsons)} granules with {max_workers} workers') + + # Track results + successful_granules = [] + failed_granules = [] + + def archive_single_granule(granule_json): + """ + Archive a single granule - wrapper function for parallel execution. + + :param granule_json: Individual granule JSON object + :return: tuple (granule_id, success, error_message) + """ + granule_id = granule_json.get('id', 'unknown') + collection_id = granule_json.get('collection', 'unknown') + + try: + LOGGER.debug(f'Processing granule {granule_id} from collection {collection_id}') + + # Create a new instance for thread safety + # Each worker gets its own archiver instance with same configuration + worker_archiver = DaacArchiverCatalia() + worker_archiver.staged_s3_bucket = self.__staged_s3_bucket + worker_archiver.daac_agreements = self.__daac_agreements + + # Set the granule data directly instead of fetching again + worker_archiver._DaacArchiverCatalia__archiving_granules_stac = granule_json + + # Process the granule + worker_archiver.archive_granule_json() + + LOGGER.info(f'Successfully archived granule {granule_id}') + return granule_id, True, None + + except Exception as e: + error_msg = f'Failed to archive granule {granule_id}: {str(e)}' + LOGGER.error(error_msg) + return granule_id, False, error_msg + + # Execute parallel processing + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all tasks + future_to_granule = { + executor.submit(archive_single_granule, granule_json): granule_json.get('id', 'unknown') + for granule_json in granule_jsons + } + + # Process completed tasks + for future in as_completed(future_to_granule): + granule_id = future_to_granule[future] + try: + result_granule_id, success, error_msg = future.result() + + if success: + successful_granules.append(result_granule_id) + else: + failed_granules.append({'granule_id': result_granule_id, 'error': error_msg}) + + except Exception as e: + error_msg = f'Unexpected error processing granule {granule_id}: {str(e)}' + LOGGER.error(error_msg) + failed_granules.append({'granule_id': granule_id, 'error': error_msg}) + + # Log final results + total_granules = len(granule_jsons) + success_count = len(successful_granules) + failed_count = len(failed_granules) + + LOGGER.info(f'Parallel archival completed: {success_count}/{total_granules} successful, {failed_count} failed') + + if failed_granules: + LOGGER.warning(f'Failed granules: {[f["granule_id"] for f in failed_granules]}') + for failure in failed_granules[:5]: # Log first 5 failures in detail + LOGGER.error(f'Failure details - {failure["granule_id"]}: {failure["error"]}') + + if successful_granules: + LOGGER.debug(f'Successfully archived granules: {successful_granules[:10]}...') # Log first 10 - def archive_granules(self, granule_jsons: list): - # TODO return self def archive_granule(self, collection_id, granule_id): From 38402df334537695cbfa8e424a97c111b7e4feef Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 16 Dec 2025 17:13:33 -0600 Subject: [PATCH 22/35] fix: add test case for parallelism processing --- .../catalya_uds_api/granules_archive_api.py | 2 +- .../daac_archiver/daac_archiver_catalia.py | 15 +- .../test_daac_archiver_catalia.py | 412 +++++++++++++++++- 3 files changed, 426 insertions(+), 3 deletions(-) diff --git a/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py index 16b9c2c6..182fb9a4 100644 --- a/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py +++ b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py @@ -127,6 +127,6 @@ async def archive_entire_collection(request: Request, collection_id: str): dac = DaacArchiverCatalia() dac.staged_s3_bucket = os.getenv('CATALYA_UDS_STAGING_BUCKET') dac.daac_agreements = authorized_configured_daac_configs - dac.archive_granule(collection_id, granule_id) + dac.archive_collection(collection_id) # TODO accept filtering mechanisms? return {'message': 'archive initiated'} diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index 1f501769..33c18b29 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -62,6 +62,19 @@ def __init__(self): self.__archiving_status_extension_url = "https://stac-extensions.github.io/archival_statuses/v1.0.0/schema.json" self.__cnm_msg_version = "1.6.0" + @property + def archiving_granules_stac(self): + return self.__archiving_granules_stac + + @archiving_granules_stac.setter + def archiving_granules_stac(self, val): + """ + :param val: + :return: None + """ + self.__archiving_granules_stac = val + return + @property def staged_s3_bucket(self): return self.__staged_s3_bucket @@ -186,7 +199,7 @@ def archive_single_granule(granule_json): worker_archiver.daac_agreements = self.__daac_agreements # Set the granule data directly instead of fetching again - worker_archiver._DaacArchiverCatalia__archiving_granules_stac = granule_json + worker_archiver.archiving_granules_stac = granule_json # Process the granule worker_archiver.archive_granule_json() diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py index 9aaf94d9..47d1a5bd 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py @@ -15,7 +15,6 @@ class TestDaacArchiverCatalia(TestCase): def setUp(self): return - def test_stage_files_01(self): """ Test stage_files method with complete workflow: @@ -1078,3 +1077,414 @@ def test_extract_files_02(self): print(f"🎯 Asset types found: {sorted(extracted_types)}") print(f"🔐 Checksum types found: {sorted(extracted_checksum_types)}") + def test_archive_granules_concurrent_processing(self): + """ + Test archive_granules method with concurrent processing: + 1. Creates multiple mock granule JSON objects + 2. Mocks the archive_granule_json method to simulate different outcomes + 3. Verifies parallel processing works correctly + 4. Checks success/failure tracking and logging + 5. Verifies thread safety with separate archiver instances + """ + import time + from unittest.mock import call + + # Create test granule JSON objects (simulating STAC Fast API response) + test_granules = [ + { + 'id': f'granule_{i:03d}', + 'collection': 'test_collection', + 'type': 'Feature', + 'geometry': { + "type": "Polygon", + "coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]]] + }, + 'properties': { + 'datetime': '2024-01-01T00:00:00Z' + }, + 'assets': { + f'data_{i:03d}.nc': { + 'href': f's3://test-bucket/data_{i:03d}.nc', + 'roles': ['data'], + 'type': 'application/netcdf' + } + } + } + for i in range(10) # Create 10 test granules + ] + + # Mock dependencies + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsS3') as mock_s3_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsSns') as mock_sns_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.SFAClientFactory') as mock_sfa_factory, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.DaacArchiverCatalia.archive_granule_json', autospec=True) as mock_archive_granule_json: + + # Setup mocks + mock_s3 = Mock() + mock_s3_class.return_value = mock_s3 + mock_sns = Mock() + mock_sns_class.return_value = mock_sns + mock_sfa_client = Mock() + mock_sfa_factory.return_value.get_instance_from_env.return_value = mock_sfa_client + + # Track calls to archive_granule_json to verify concurrent execution + call_times = [] + processed_granule_ids = [] + + def mock_archive_granule_json_impl(self): + """Mock implementation that simulates processing time and tracks calls""" + # Get the granule ID from the current archiver instance + current_granule = self._DaacArchiverCatalia__archiving_granules_stac + granule_id = current_granule.get('id', 'unknown') if isinstance(current_granule, dict) else current_granule.id + + # # Record call time and granule ID + call_times.append(time.time()) + processed_granule_ids.append(granule_id) + + # Simulate different processing outcomes based on granule ID + if granule_id == 'granule_003': + # Simulate a failure for granule_003 + raise RuntimeError(f"Simulated failure for {granule_id}") + elif granule_id == 'granule_007': + # Simulate another failure for granule_007 + raise ValueError(f"Validation error for {granule_id}") + else: + # Simulate successful processing with some delay + time.sleep(0.1) # Small delay to simulate real processing + return self + return self + + # Apply the mock implementation + mock_archive_granule_json.side_effect = mock_archive_granule_json_impl + + # Create main archiver instance + archiver = DaacArchiverCatalia() + archiver._DaacArchiverCatalia__staged_s3_bucket = 'test-staged-bucket' + archiver._DaacArchiverCatalia__daac_agreements = [ + { + 'daac_collection_name': 'TEST_COLLECTION', + 'daac_data_version': '1.0', + 'daac_provider': 'test_provider', + 'daac_sns_topic_arn': 'arn:aws:sns:us-west-2:123456789012:test-topic', + 'daac_role_arn': 'arn:aws:iam::123456789012:role/test-role', + 'daac_role_session_name': 'test-session' + } + ] + + # Record start time + start_time = time.time() + + # Call archive_granules with different worker counts for testing + result = archiver.archive_granules(test_granules, max_workers=5) + + # Record end time + end_time = time.time() + total_execution_time = end_time - start_time + + # Verify method returns self + + # Verify archive_granule_json was called for each granule + expected_call_count = len(test_granules) + self.assertEqual(mock_archive_granule_json.call_count, expected_call_count, + f"archive_granule_json should be called {expected_call_count} times") + + # Verify all granules were processed (including failed ones) + expected_granule_ids = {granule['id'] for granule in test_granules} + actual_granule_ids = set(processed_granule_ids) + self.assertEqual(actual_granule_ids, expected_granule_ids, + f"All granules should be processed. Expected: {expected_granule_ids}, Got: {actual_granule_ids}") + + # Verify concurrent execution occurred (total time should be less than sequential) + sequential_time_estimate = len(test_granules) * 0.1 # 0.1s per granule + self.assertLess(total_execution_time, sequential_time_estimate * 0.8, + f"Execution should be faster than sequential. Total: {total_execution_time:.2f}s, Sequential estimate: {sequential_time_estimate:.2f}s") + + # Verify parallel execution by checking call time distribution + if len(call_times) > 1: + # Check that calls started within a reasonable window (parallel execution) + call_time_range = max(call_times) - min(call_times) + # Most calls should start within first 0.5 seconds (parallel startup) + early_calls = [t for t in call_times if t - min(call_times) < 0.5] + self.assertGreaterEqual(len(early_calls), min(5, len(test_granules)), + f"At least {min(5, len(test_granules))} calls should start early (parallel execution)") + + # Test with empty granule list + result_empty = archiver.archive_granules([]) + self.assertEqual(result_empty, archiver, "archive_granules should handle empty list") + + # Reset mock call count for next test + mock_archive_granule_json.reset_mock() + call_times.clear() + processed_granule_ids.clear() + + # Test with single granule + single_granule = [test_granules[0]] + result_single = archiver.archive_granules(single_granule, max_workers=1) + self.assertEqual(result_single, archiver, "archive_granules should handle single granule") + self.assertEqual(mock_archive_granule_json.call_count, 1, "Should call archive_granule_json once for single granule") + + print(f"✅ Test passed! Concurrent processing verification:") + print(f" - Processed {len(test_granules)} granules concurrently") + print(f" - Total execution time: {total_execution_time:.2f}s (vs {sequential_time_estimate:.2f}s sequential)") + print(f" - Expected failures occurred for granule_003 and granule_007") + print(f" - Thread safety verified with separate archiver instances") + + def test_archive_granules_error_handling_and_isolation(self): + """ + Test archive_granules method error handling and failure isolation: + 1. Creates granules with different failure scenarios + 2. Verifies individual failures don't stop other processing + 3. Checks error logging and result tracking + 4. Tests edge cases and validation + """ + import time + from unittest.mock import call + + # Create test granules with various scenarios + test_granules = [ + # Normal granules that should succeed + {'id': 'success_001', 'collection': 'test_collection', 'type': 'Feature', 'properties': {'datetime': '2024-01-01T00:00:00Z'}, 'assets': {}}, + {'id': 'success_002', 'collection': 'test_collection', 'type': 'Feature', 'properties': {'datetime': '2024-01-01T01:00:00Z'}, 'assets': {}}, + {'id': 'success_003', 'collection': 'test_collection', 'type': 'Feature', 'properties': {'datetime': '2024-01-01T02:00:00Z'}, 'assets': {}}, + # Granules that will fail + {'id': 'fail_network', 'collection': 'test_collection', 'type': 'Feature', 'properties': {'datetime': '2024-01-01T03:00:00Z'}, 'assets': {}}, + {'id': 'fail_validation', 'collection': 'test_collection', 'type': 'Feature', 'properties': {'datetime': '2024-01-01T04:00:00Z'}, 'assets': {}}, + # More successful granules to verify isolation + {'id': 'success_004', 'collection': 'test_collection', 'type': 'Feature', 'properties': {'datetime': '2024-01-01T05:00:00Z'}, 'assets': {}}, + {'id': 'success_005', 'collection': 'test_collection', 'type': 'Feature', 'properties': {'datetime': '2024-01-01T06:00:00Z'}, 'assets': {}}, + ] + + # Mock dependencies + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsS3') as mock_s3_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsSns') as mock_sns_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.SFAClientFactory') as mock_sfa_factory, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.DaacArchiverCatalia.archive_granule_json', autospec=True) as mock_archive_granule_json: + + # Setup mocks + mock_s3 = Mock() + mock_s3_class.return_value = mock_s3 + mock_sns = Mock() + mock_sns_class.return_value = mock_sns + mock_sfa_client = Mock() + mock_sfa_factory.return_value.get_instance_from_env.return_value = mock_sfa_client + + # Track processing results + processing_results = {} + + def mock_archive_granule_json_impl(self): + """Mock implementation with controlled failures""" + current_granule = self._DaacArchiverCatalia__archiving_granules_stac + granule_id = current_granule.get('id', 'unknown') if isinstance(current_granule, dict) else current_granule.id + + # Simulate different failure types + if granule_id == 'fail_network': + processing_results[granule_id] = 'network_error' + raise ConnectionError("Network timeout during archival process") + elif granule_id == 'fail_validation': + processing_results[granule_id] = 'validation_error' + raise ValueError("Invalid granule metadata format") + else: + # Successful processing + processing_results[granule_id] = 'success' + time.sleep(0.05) # Small delay to simulate processing + return self + + mock_archive_granule_json.side_effect = mock_archive_granule_json_impl + + # Create archiver instance + archiver = DaacArchiverCatalia() + archiver._DaacArchiverCatalia__staged_s3_bucket = 'test-staged-bucket' + archiver._DaacArchiverCatalia__daac_agreements = [{'test': 'agreement'}] + + # Capture log messages to verify error logging + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.LOGGER') as mock_logger: + # Execute archive_granules + result = archiver.archive_granules(test_granules, max_workers=3) + + # Verify method returns self even with failures + self.assertEqual(result, archiver, "archive_granules should return self even with failures") + + # Verify all granules were processed + self.assertEqual(len(processing_results), len(test_granules), + f"All {len(test_granules)} granules should be processed") + + # Verify success and failure counts + successful_granules = [gid for gid, status in processing_results.items() if status == 'success'] + failed_granules = [gid for gid, status in processing_results.items() if status != 'success'] + + expected_successful = ['success_001', 'success_002', 'success_003', 'success_004', 'success_005'] + expected_failed = ['fail_network', 'fail_validation'] + + self.assertEqual(set(successful_granules), set(expected_successful), + f"Expected successful granules: {expected_successful}, Got: {successful_granules}") + self.assertEqual(set(failed_granules), set(expected_failed), + f"Expected failed granules: {expected_failed}, Got: {failed_granules}") + + # Verify archive_granule_json was called for each granule + self.assertEqual(mock_archive_granule_json.call_count, len(test_granules), + f"archive_granule_json should be called {len(test_granules)} times") + + # Verify error logging occurred + error_calls = [call for call in mock_logger.error.call_args_list if call[0]] + self.assertGreaterEqual(len(error_calls), 2, "Should log errors for failed granules") + + # Check that error messages contain granule IDs + error_messages = [str(call[0][0]) for call in error_calls] + self.assertTrue(any('fail_network' in msg for msg in error_messages), + "Should log error for fail_network granule") + self.assertTrue(any('fail_validation' in msg for msg in error_messages), + "Should log error for fail_validation granule") + + # Verify info logging occurred + info_calls = [call for call in mock_logger.info.call_args_list if call[0]] + self.assertGreater(len(info_calls), 0, "Should log info messages during processing") + + # Check completion summary was logged + completion_messages = [str(call[0][0]) for call in info_calls] + completion_summary = next((msg for msg in completion_messages if 'Parallel archival completed' in msg), None) + self.assertIsNotNone(completion_summary, "Should log completion summary") + + # Verify summary contains correct counts + self.assertIn(f'{len(successful_granules)}/{len(test_granules)} successful', completion_summary) + self.assertIn(f'{len(failed_granules)} failed', completion_summary) + + print(f"✅ Test passed! Error handling and isolation verification:") + print(f" - Processed {len(test_granules)} granules with mixed success/failure") + print(f" - Successful: {len(successful_granules)}, Failed: {len(failed_granules)}") + print(f" - Failures were isolated and didn't stop other processing") + print(f" - Error logging verified for all failure types") + print(f" - Method returned successfully despite individual failures") + + def test_archive_granules_thread_safety_validation(self): + """ + Test archive_granules method thread safety: + 1. Verifies each worker gets its own DaacArchiverCatalia instance + 2. Checks that configuration is properly copied to worker instances + 3. Validates no shared state issues between workers + 4. Tests worker instance isolation + """ + # Create test granules + test_granules = [ + {'id': f'thread_test_{i}', 'collection': 'test_collection', 'type': 'Feature', + 'properties': {'datetime': '2024-01-01T00:00:00Z'}, 'assets': {}} + for i in range(6) + ] + + # Mock dependencies + with patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsS3') as mock_s3_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.AwsSns') as mock_sns_class, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.SFAClientFactory') as mock_sfa_factory, \ + patch('cumulus_lambda_functions.daac_archiver.daac_archiver_catalia.DaacArchiverCatalia.archive_granule_json', autospec=True) as mock_archive_granule_json: + + # Setup mocks + mock_s3_class.return_value = Mock() + mock_sns_class.return_value = Mock() + mock_sfa_factory.return_value.get_instance_from_env.return_value = Mock() + + # Track worker instances and their configurations + worker_instances = [] + worker_configs = [] + processed_granules_by_instance = {} + + def mock_archive_granule_json_impl(instance): + """Mock that tracks worker instances and their configurations""" + # Record this worker instance + instance_id = id(instance) # Unique identifier for each instance + if instance_id not in processed_granules_by_instance: + processed_granules_by_instance[instance_id] = [] + worker_instances.append(instance) + # Capture configuration + worker_configs.append({ + 'instance_id': instance_id, + 'staged_s3_bucket': instance._DaacArchiverCatalia__staged_s3_bucket, + 'daac_agreements': instance._DaacArchiverCatalia__daac_agreements, + 'current_granule_id': instance._DaacArchiverCatalia__archiving_granules_stac.get('id') + }) + + # Track which granule this instance is processing + current_granule = instance._DaacArchiverCatalia__archiving_granules_stac + granule_id = current_granule.get('id', 'unknown') + processed_granules_by_instance[instance_id].append(granule_id) + + return instance + + mock_archive_granule_json.side_effect = mock_archive_granule_json_impl + + # Create main archiver instance with specific configuration + main_archiver = DaacArchiverCatalia() + main_archiver._DaacArchiverCatalia__staged_s3_bucket = 'main-staged-bucket' + main_archiver._DaacArchiverCatalia__daac_agreements = [ + {'daac_name': 'test_daac', 'config': 'main_config'} + ] + + # Execute archive_granules + result = main_archiver.archive_granules(test_granules, max_workers=3) + + # Verify method returns self + self.assertEqual(result, main_archiver, "archive_granules should return main archiver instance") + + # Verify multiple worker instances were created + unique_instance_ids = set(id(instance) for instance in worker_instances) + self.assertGreater(len(unique_instance_ids), 1, "Multiple worker instances should be created") + self.assertLessEqual(len(unique_instance_ids), len(test_granules), + "Should not create more instances than granules") + + # Verify main archiver is not used as worker (thread safety) + main_instance_id = id(main_archiver) + worker_instance_ids = {id(instance) for instance in worker_instances} + self.assertNotIn(main_instance_id, worker_instance_ids, + "Main archiver instance should not be used as worker") + + # Verify each worker instance has correct configuration + for config in worker_configs: + self.assertEqual(config['staged_s3_bucket'], 'main-staged-bucket', + f"Worker instance {config['instance_id']} should have correct staged_s3_bucket") + self.assertEqual(config['daac_agreements'], [{'daac_name': 'test_daac', 'config': 'main_config'}], + f"Worker instance {config['instance_id']} should have correct daac_agreements") + self.assertIn(config['current_granule_id'], [g['id'] for g in test_granules], + f"Worker instance {config['instance_id']} should process valid granule") + + # Verify all granules were processed exactly once + all_processed_granules = [] + for granules_list in processed_granules_by_instance.values(): + all_processed_granules.extend(granules_list) + + expected_granule_ids = [g['id'] for g in test_granules] + self.assertEqual(sorted(all_processed_granules), sorted(expected_granule_ids), + "All granules should be processed exactly once") + + # Verify no granule was processed by multiple instances + granule_processing_count = {} + for granule_id in all_processed_granules: + granule_processing_count[granule_id] = granule_processing_count.get(granule_id, 0) + 1 + + for granule_id, count in granule_processing_count.items(): + self.assertEqual(count, 1, f"Granule {granule_id} should be processed exactly once, got {count}") + + # Verify worker instances are separate classes (not the same instance reused) + worker_classes = [type(instance) for instance in worker_instances] + expected_class = DaacArchiverCatalia + for worker_class in worker_classes: + self.assertEqual(worker_class, expected_class, "All workers should be DaacArchiverCatalia instances") + + # Test edge case: max_workers larger than granule count + worker_instances.clear() + worker_configs.clear() + processed_granules_by_instance.clear() + + single_granule = [test_granules[0]] + result_single = main_archiver.archive_granules(single_granule, max_workers=10) + self.assertEqual(result_single, main_archiver, "Should handle max_workers > granule count") + + # Should only create one worker instance for one granule + unique_instance_ids_single = set(id(instance) for instance in worker_instances) + self.assertEqual(len(unique_instance_ids_single), 1, "Should create only one worker for one granule") + + print(f"✅ Test passed! Thread safety validation:") + print(f" - Created {len(unique_instance_ids)} separate worker instances") + print(f" - Main archiver instance isolated from workers") + print(f" - Configuration correctly copied to all workers") + print(f" - Each granule processed by exactly one worker instance") + print(f" - No shared state issues detected") + + From 0d33503ad018bb831e20ab762a69e856dab01c16 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 17 Dec 2025 13:26:00 -0600 Subject: [PATCH 23/35] feat: update code to store response in ddb --- .../daac_archiver/catalia_status_db.py | 42 +++++++++++++ .../daac_archiver/daac_archiver_catalia.py | 59 ++++++++++++++++--- .../daac_archiver/daac_archiver_logic.py | 28 +-------- .../test_daac_archiver_catalia.py | 2 +- 4 files changed, 96 insertions(+), 35 deletions(-) create mode 100644 cumulus_lambda_functions/daac_archiver/catalia_status_db.py diff --git a/cumulus_lambda_functions/daac_archiver/catalia_status_db.py b/cumulus_lambda_functions/daac_archiver/catalia_status_db.py new file mode 100644 index 00000000..df13bd7d --- /dev/null +++ b/cumulus_lambda_functions/daac_archiver/catalia_status_db.py @@ -0,0 +1,42 @@ +from mdps_ds_lib.lib.aws.no_sql_abstract import NoSqlProps +from mdps_ds_lib.lib.aws.no_sql_ddb import NoSqlDdb +from mdps_ds_lib.lib.aws.no_sql_factory import NoSqlFactory + + +class CataliaStatusDb: + identifier = 'identifier' + collection = 'collection' + name_str = 'name' + status = 'status' + error_code = 'errorCode' + error_message = 'errorMessage' + href_str = 'href' + datetime_str = 'datetime' + + def __init__(self, table_name: str): + ddb_props = NoSqlProps() + ddb_props.table = table_name + ddb_props.primary_key = self.identifier + ddb_props.secondary_key = self.datetime_str + + param = ddb_props.to_json() + param['file_repo'] = 'AWS_DDB' + + self.__ddb: NoSqlDdb = NoSqlFactory().get_instance(**param) + + def get(self, identifier: str): + results = self.__ddb.get(identifier, secondary_key=None) + return results + + def add(self, identifier: str, collection: dict, name_str: str, status: str, datetime_str: str, error_code: str=None, error_message: str=None, href_str: str=None): + item1 = { + self.name_str: name_str, + self.collection: collection, + self.status: status, + self.error_code: error_code, + self.error_message: error_message, + self.href_str: href_str, + } + item1 = {k: v for k, v in item1.items() if v is not None} + self.__ddb.add(identifier, datetime_str, item1, replace=False) + return diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index 33c18b29..f3b7e74a 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -1,4 +1,7 @@ import json +import os +from uuid import uuid4 + from mdps_ds_lib.lib.aws.aws_s3 import AwsS3 from mdps_ds_lib.lib.aws.aws_sns import AwsSns from mdps_ds_lib.lib.utils.time_utils import TimeUtils @@ -6,6 +9,7 @@ from mdps_ds_lib.stage_in_out.stage_in_out_utils import StageInOutUtils from pystac import Item +from cumulus_lambda_functions.daac_archiver.catalia_status_db import CataliaStatusDb from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator from cumulus_lambda_functions.lib.uds_utils import backoff_wrapper @@ -42,7 +46,7 @@ class DaacArchiverCatalia: "type": "string", "format": "iri-reference" }, - "datetime": { + "datetime": { "title": "Date and Time", "description": "timestamp of this update, in UTC (Formatted in RFC 3339) ", "type": "string", @@ -56,6 +60,7 @@ def __init__(self): self.__sns = AwsSns() self.__s3 = AwsS3() self.__staged_s3_bucket = 'SET_ME_UP' # TODO + self.__status_ddb = CataliaStatusDb(os.getenv('CATALYA_STATUS_DB', None)) self.__daac_agreements = [] self.__sfa_client = SFAClientFactory().get_instance_from_env() self.__archiving_granules_stac = None @@ -253,9 +258,12 @@ def archive_single_granule(granule_json): return self + def load_granule_from_client(self, collection_id, granule_id): + self.__archiving_granules_stac = backoff_wrapper(self.__sfa_client.get_item, collection_id, item_id=granule_id) + return self def archive_granule(self, collection_id, granule_id): # TODO look up granule details - self.__archiving_granules_stac = backoff_wrapper(self.__sfa_client.get_item, collection_id, item_id=granule_id) + self.load_granule_from_client(collection_id, granule_id) LOGGER.debug(f'retrieved stac_item from STAC Fast API: {self.__archiving_granules_stac}') self.archive_granule_json() return self @@ -379,7 +387,28 @@ def stage_files(self): LOGGER.warning(f'Non-S3 asset {asset_key} not staged: {source_href}') return self - def update_status(self, archival_status: dict): + def update_status_wrapper(self, cnm_notification_msg: dict): + existing_statuses = self.__status_ddb.get(cnm_notification_msg['identifier']) + if len(existing_statuses) < 1: + raise ValueError(f'unknown collection & granule: {cnm_notification_msg}') + collection_id, granule_id = existing_statuses[0][CataliaStatusDb.collection], existing_statuses[0][CataliaStatusDb.name_str] + self.load_granule_from_client(collection_id, granule_id) + if cnm_notification_msg['response']['status'] == 'SUCCESS': + latest_daac_status = { + 'status': 'cnm-receive-success', + } + # TODO ask DAAC if they pass HREF? + else: + latest_daac_status = { + 'status': 'cnm-receive-failed', + 'errorMessage': cnm_notification_msg['response']['errorMessage'] if 'errorMessage' in cnm_notification_msg['response'] else 'unknown', + 'errorCode': cnm_notification_msg['response']['errorCode'] if 'errorCode' in cnm_notification_msg['response'] else 'unknown', + } + self.update_status(cnm_notification_msg['identifier'], latest_daac_status) + + return self + + def update_status(self, identifier: str, archival_status: dict): """ 1. validate archival_status from parameter against self.archival_status_schema 2. Add archival_status to self.__archiving_granules_stac>properties>archival:status @@ -427,6 +456,7 @@ def update_status(self, archival_status: dict): if not collection_id or not item_id: raise ValueError(f'Missing collection_id or item_id from STAC item. collection_id: {collection_id}, item_id: {item_id}') + errors = [] try: # Convert STAC item to JSON dictionary stac_item_dict = self.__archiving_granules_stac.to_dict() @@ -437,15 +467,26 @@ def update_status(self, archival_status: dict): item_id=item_id, item=stac_item_dict ) - LOGGER.info(f'Successfully updated STAC item {item_id} in collection {collection_id} with new archival status') LOGGER.debug(f'Updated item response: {updated_item}') + except Exception as e: + LOGGER.exception(f'Failed to update STAC item {item_id} in collection {collection_id}') + errors.append(e) + try: + self.__status_ddb.add(identifier, collection_id, item_id, archival_status['status'], + archival_status_with_timestamp['datetime'], + archival_status['errorCode'] if 'errorCode' in archival_status else None, + archival_status['errorMessage'] if 'errorMessage' in archival_status else None, + archival_status['href'] if 'href' in archival_status else None, + ) + except Exception as e: + LOGGER.exception(f'Failed to store status in DDB {collection_id}') + errors.append(e) - return self + if len(errors) > 0: + raise RuntimeError(f'Failed to update STAC item status: {errors}') - except Exception as e: - LOGGER.error(f'Failed to update STAC item {item_id} in collection {collection_id}: {e}') - raise RuntimeError(f'Failed to update STAC item status: {e}') from e + return def extract_files(self, daac_config: dict): """ @@ -654,7 +695,7 @@ def send_daac_sns(self, daac_config): 'name': daac_config['daac_collection_name'], 'version': daac_config['daac_data_version'], }, - "identifier": self.__archiving_granules_stac.id, # Seems like it's the same granule IDuds_cnm_json['identifier'], + 'identifier': uuid4(), # "identifier": self.__archiving_granules_stac.id, # Seems like it's the same granule IDuds_cnm_json['identifier'], # From DAAC: Unique identifier for the message as a whole. It is the senders responsibility to ensure uniqueness. This identifier can be used in response messages to provide tracability. "submissionTime": f'{TimeUtils.get_current_time()}Z', "provider": daac_config['daac_provider'], # NOTE: we can't use tenant as provider anymore coz we aren't sure tennt will be there in CATALIA. if 'daac_provider' in daac_config else granule_identifier.tenant diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py index 01cf55e7..18a4d307 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py @@ -9,8 +9,8 @@ from mdps_ds_lib.lib.aws.aws_message_transformers import AwsMessageTransformers from mdps_ds_lib.lib.utils.json_validator import JsonValidator -from mdps_ds_lib.stac_fast_api_client.sfa_client_factory import SFAClientFactory +from cumulus_lambda_functions.daac_archiver.daac_archiver_catalia import DaacArchiverCatalia from cumulus_lambda_functions.lib.uds_db.granules_db_index import GranulesDbIndex from mdps_ds_lib.lib.aws.aws_sns import AwsSns from mdps_ds_lib.lib.utils.time_utils import TimeUtils @@ -154,30 +154,8 @@ def update_stac(self, cnm_notification_msg): raise ValueError(f"missing ARCHIVAL_STATUS_MECHANISM environment variable or value is not {['UDS', 'FAST_STAC']}") if update_type == 'UDS': return self.update_stac_uds(cnm_notification_msg) - return self.update_stac_fast_api(cnm_notification_msg) - - def update_stac_fast_api(self, cnm_notification_msg): - sfa_client = SFAClientFactory().get_instance_from_env() - # TODO: update this part ? how to get collection and granule id? - collection_id, granule_id = ':'.join(cnm_notification_msg['identifier'].split(':')[:-1]), cnm_notification_msg['identifier'] - # TODO assuming granule ID is URN:NASA:VENUE:TENANT:VENUE:COLLECTION_ID:COLLECTION_ID - existing_item = sfa_client.get_item(collection_id, granule_id) - # TODO handle error when no existing_item. Currently, it is requests.HTTPError with 404 - if cnm_notification_msg['response']['status'] == 'SUCCESS': - latest_daac_status = { - 'status': 'cnm-receive-success', - } - # TODO ask DAAC if they pass HREF? - else: - latest_daac_status = { - 'status': 'cnm-receive-failed', - 'errorMessage': cnm_notification_msg['response']['errorMessage'] if 'errorMessage' in cnm_notification_msg['response'] else 'unknown', - 'errorCode': cnm_notification_msg['response']['errorCode'] if 'errorCode' in cnm_notification_msg['response'] else 'unknown', - } - latest_daac_status['datetime'] = TimeUtils.get_current_time() - existing_item['properties']['archival_statuses'] = existing_item['properties']['archival_statuses'] + [latest_daac_status] if 'archival_statuses' in existing_item['properties'] else [latest_daac_status] - updated_item = sfa_client.update_item(collection_id, granule_id, existing_item, update_whole=True) # TODO partial update via patch is not working at this moment. - return + dac = DaacArchiverCatalia() + return dac.update_status_wrapper(cnm_notification_msg) def update_stac_uds(self, cnm_notification_msg): granule_identifier = UdsCollections.decode_identifier(cnm_notification_msg['identifier']) # This is normally meant to be for collection. Since our granule ID also has collection id prefix. we can use this. diff --git a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py index 47d1a5bd..5d7d2b32 100644 --- a/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py +++ b/tests/cumulus_lambda_functions/daac_archiver/test_daac_archiver_catalia.py @@ -361,7 +361,7 @@ def mock_update_item(collection_id, item_id, item, update_whole=True): # Apply status updates one by one and verify each for i, status_update in enumerate(status_updates): # Call update_status with the current status - result = archiver.update_status(status_update) + result = archiver.update_status('sample', status_update) # Verify method returns self self.assertEqual(result, archiver, f"update_status should return self (iteration {i+1})") From 2744bb238e3974cbb896ad188f57ce13da0c27cb Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 17 Dec 2025 14:40:13 -0600 Subject: [PATCH 24/35] feat: update terraform --- .../daac_archiver/daac_archiver_logic.py | 2 +- tf-module/uds_catalia/catalia_ddb.tf | 30 +++++++++++++++++++ tf-module/uds_catalia/daac_archiver.tf | 3 +- tf-module/uds_catalia/uds_api_lambda.tf | 6 +++- tf-module/uds_catalia/variables.tf | 4 +++ 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py index 18a4d307..cd94f187 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_logic.py @@ -150,7 +150,7 @@ def send_to_daac(self, event: dict): def update_stac(self, cnm_notification_msg): update_type = os.getenv('ARCHIVAL_STATUS_MECHANISM', '') - if not any([k for k in ['UDS', 'FAST_STAC'] if k == update_type]): + if not any([k for k in ['UDS', 'CATALYA'] if k == update_type]): raise ValueError(f"missing ARCHIVAL_STATUS_MECHANISM environment variable or value is not {['UDS', 'FAST_STAC']}") if update_type == 'UDS': return self.update_stac_uds(cnm_notification_msg) diff --git a/tf-module/uds_catalia/catalia_ddb.tf b/tf-module/uds_catalia/catalia_ddb.tf index 3ee05311..84033c4b 100644 --- a/tf-module/uds_catalia/catalia_ddb.tf +++ b/tf-module/uds_catalia/catalia_ddb.tf @@ -48,6 +48,36 @@ resource "aws_dynamodb_table" "uds_ctla_daac_handshake" { type = "S" } +# global_secondary_index { +# name = "${var.prefix}-uds_auth_ddb_gsi_" +# hash_key = "userGroup" +# projection_type = "KEYS_ONLY" +# } +# +# global_secondary_index { +# name = "GSI2_Project_Venue" +# hash_key = "Project" +# range_key = "Venue" +# projection_type = "KEYS_ONLY" +# } +} + +resource "aws_dynamodb_table" "uds_ctla_daac_status" { + name = "${var.prefix}-uds_ctla_daac_status" + billing_mode = "PAY_PER_REQUEST" + hash_key = "identifier" + range_key = "datetime" + + attribute { + name = "identifier" + type = "S" + } + + attribute { + name = "datetime" + type = "S" + } + # global_secondary_index { # name = "${var.prefix}-uds_auth_ddb_gsi_" # hash_key = "userGroup" diff --git a/tf-module/uds_catalia/daac_archiver.tf b/tf-module/uds_catalia/daac_archiver.tf index cb0a5e00..4c9b1a8e 100644 --- a/tf-module/uds_catalia/daac_archiver.tf +++ b/tf-module/uds_catalia/daac_archiver.tf @@ -42,8 +42,9 @@ resource "aws_lambda_function" "uds_daac_archiver_response" { environment { variables = { LOG_LEVEL = var.log_level - ARCHIVAL_STATUS_MECHANISM = "UDS" # UDS or FAST_STAC + ARCHIVAL_STATUS_MECHANISM = "CATALYA" # UDS or FAST_STAC DS_URL = "TODO" + CATALYA_STATUS_DB = aws_dynamodb_table.uds_ctla_daac_status.name SFA_USERNAME = "TODO" SFA_PASSWORD = "TODO" SFA_AUTH_KEY = "TODO" diff --git a/tf-module/uds_catalia/uds_api_lambda.tf b/tf-module/uds_catalia/uds_api_lambda.tf index 97947d96..b9e8a5a9 100644 --- a/tf-module/uds_catalia/uds_api_lambda.tf +++ b/tf-module/uds_catalia/uds_api_lambda.tf @@ -10,6 +10,11 @@ resource "aws_lambda_function" "uds_api_1" { environment { variables = { LOG_LEVEL = var.log_level + CATALYA_STATUS_DB = aws_dynamodb_table.uds_ctla_daac_status.name + CATALYA_DB_NAME = aws_dynamodb_table.uds_ctla_auth_ddb.name + CATALYA_DAAC_AGREEMENT_DB_NAME = aws_dynamodb_table.uds_ctla_daac_handshake.name + ADMIN_COMMA_SEP_GROUPS = var.comma_separated_admin_groups + CATALYA_UDS_STAGING_BUCKET = var.uds_ctla_s3_staging_bucket # UNITY_DEFAULT_PROVIDER = var.unity_default_provider COLLECTION_CREATION_LAMBDA_NAME = "arn:aws:lambda:${var.aws_region}:${local.account_id}:function:${var.prefix}-uds_api_1" # SNS_TOPIC_ARN = var.cnm_sns_topic_arn @@ -20,7 +25,6 @@ resource "aws_lambda_function" "uds_api_1" { # ES_URL = aws_elasticsearch_domain.uds-es.endpoint # ES_PORT = 443 # REPORT_TO_EMS = var.report_to_ems - ADMIN_COMMA_SEP_GROUPS = var.comma_separated_admin_groups DAPA_API_URL_BASE = "${var.uds_base_url}/${var.dapa_api_prefix}" } } diff --git a/tf-module/uds_catalia/variables.tf b/tf-module/uds_catalia/variables.tf index 08e144e2..41f4e4ee 100644 --- a/tf-module/uds_catalia/variables.tf +++ b/tf-module/uds_catalia/variables.tf @@ -235,6 +235,10 @@ variable "granules_cnm_ingester__s3_glob" { default = "*unity*" description = "GLOB expression that has all s3 buckets connecting to SNS topic" } + +variable "uds_ctla_s3_staging_bucket" { + type = string +} #variable "granules_cnm_ingester__is_deploying_bucket" { # type = bool # default = false From 2c5b2db7961f3c634ca52ad2e6f26afeae54b361 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 6 Jan 2026 11:35:58 -0600 Subject: [PATCH 25/35] fix: using ssm to get details --- .../daac_archiver/daac_archiver_catalia.py | 8 ++++++- requirements.txt | 2 +- tf-module/uds_catalia/daac_archiver.tf | 21 +++++++++++++------ tf-module/uds_catalia/uds_api_lambda.tf | 1 + 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index f3b7e74a..d1795360 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -2,6 +2,7 @@ import os from uuid import uuid4 +from mdps_ds_lib.lib.aws.aws_param_store import AwsParamStore from mdps_ds_lib.lib.aws.aws_s3 import AwsS3 from mdps_ds_lib.lib.aws.aws_sns import AwsSns from mdps_ds_lib.lib.utils.time_utils import TimeUtils @@ -62,7 +63,12 @@ def __init__(self): self.__staged_s3_bucket = 'SET_ME_UP' # TODO self.__status_ddb = CataliaStatusDb(os.getenv('CATALYA_STATUS_DB', None)) self.__daac_agreements = [] - self.__sfa_client = SFAClientFactory().get_instance_from_env() + sfa_auth_ssm_key = os.getenv('SFA_AUTH', None) + LOGGER.debug(f'retrieving SSM details from {sfa_auth_ssm_key}') + sfa_auth_ssm_dict = AwsParamStore().get_param(sfa_auth_ssm_key) + if sfa_auth_ssm_dict is None: + raise ValueError(f'missing SSM detaails for SFA Auth: {sfa_auth_ssm_dict}') + self.__sfa_client = SFAClientFactory().get_instance_from_dict(json.loads(sfa_auth_ssm_dict)) self.__archiving_granules_stac = None self.__archiving_status_extension_url = "https://stac-extensions.github.io/archival_statuses/v1.0.0/schema.json" self.__cnm_msg_version = "1.6.0" diff --git a/requirements.txt b/requirements.txt index efe26b00..d5e67953 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ jsonschema==4.23.0 jsonschema-specifications==2023.12.1 lark==0.12.0 mangum==0.18.0 -mdps-ds-lib==1.2.0.dev300 +mdps-ds-lib==1.2.0.dev400 pydantic==2.9.2 pydantic_core==2.23.4 pygeofilter==0.2.4 diff --git a/tf-module/uds_catalia/daac_archiver.tf b/tf-module/uds_catalia/daac_archiver.tf index 4c9b1a8e..5a9990fc 100644 --- a/tf-module/uds_catalia/daac_archiver.tf +++ b/tf-module/uds_catalia/daac_archiver.tf @@ -43,13 +43,8 @@ resource "aws_lambda_function" "uds_daac_archiver_response" { variables = { LOG_LEVEL = var.log_level ARCHIVAL_STATUS_MECHANISM = "CATALYA" # UDS or FAST_STAC - DS_URL = "TODO" + SFA_AUTH = aws_ssm_parameter.daac_archiver_credentials.id CATALYA_STATUS_DB = aws_dynamodb_table.uds_ctla_daac_status.name - SFA_USERNAME = "TODO" - SFA_PASSWORD = "TODO" - SFA_AUTH_KEY = "TODO" - SFA_AUTH_VALUE = "TODO" - SFA_BEARER_TOKEN = "TODO" } } @@ -60,6 +55,20 @@ resource "aws_lambda_function" "uds_daac_archiver_response" { tags = var.tags } +resource "aws_ssm_parameter" "daac_archiver_credentials" { + name = "/${var.prefix}/daac-archiver/credentials" + type = "SecureString" + value = jsonencode({ + DS_URL = "TODO" + SFA_USERNAME = "TODO" + SFA_PASSWORD = "TODO" + SFA_AUTH_KEY = "TODO" + SFA_AUTH_VALUE = "TODO" + SFA_BEARER_TOKEN = "TODO" + }) + description = "Secure credentials and configuration for DAAC archiver service" + tags = var.tags +} resource "aws_sns_topic" "uds_daac_archiver_response" { name = "${var.prefix}-uds_daac_archiver_response" diff --git a/tf-module/uds_catalia/uds_api_lambda.tf b/tf-module/uds_catalia/uds_api_lambda.tf index b9e8a5a9..fd5d5779 100644 --- a/tf-module/uds_catalia/uds_api_lambda.tf +++ b/tf-module/uds_catalia/uds_api_lambda.tf @@ -15,6 +15,7 @@ resource "aws_lambda_function" "uds_api_1" { CATALYA_DAAC_AGREEMENT_DB_NAME = aws_dynamodb_table.uds_ctla_daac_handshake.name ADMIN_COMMA_SEP_GROUPS = var.comma_separated_admin_groups CATALYA_UDS_STAGING_BUCKET = var.uds_ctla_s3_staging_bucket + SFA_AUTH = aws_ssm_parameter.daac_archiver_credentials.id # UNITY_DEFAULT_PROVIDER = var.unity_default_provider COLLECTION_CREATION_LAMBDA_NAME = "arn:aws:lambda:${var.aws_region}:${local.account_id}:function:${var.prefix}-uds_api_1" # SNS_TOPIC_ARN = var.cnm_sns_topic_arn From 061e31703edba7319c29eaae15e8384a4e583900 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 8 Jan 2026 16:50:33 -0600 Subject: [PATCH 26/35] feat: ecs tf --- .../daac_archiver/daac_archiver_catalia.py | 1 + tf-module/uds_catalia/ecs.tf | 56 +++++++++++++++++++ tf-module/uds_catalia/main.tf | 3 +- tf-module/uds_catalia/variables.tf | 1 + tf-module/uds_catalia_iam/uds_ecs_role.tf | 20 +++++++ 5 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tf-module/uds_catalia/ecs.tf create mode 100644 tf-module/uds_catalia_iam/uds_ecs_role.tf diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index d1795360..bc61d2f3 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -57,6 +57,7 @@ class DaacArchiverCatalia: }, "additionalProperties": False } + def __init__(self): self.__sns = AwsSns() self.__s3 = AwsS3() diff --git a/tf-module/uds_catalia/ecs.tf b/tf-module/uds_catalia/ecs.tf new file mode 100644 index 00000000..38cb02ee --- /dev/null +++ b/tf-module/uds_catalia/ecs.tf @@ -0,0 +1,56 @@ +resource "aws_ecs_cluster" "ds_cluster" { + name = "${var.prefix}-ds_cluster" +} + +data "aws_iam_role" "ecs_task_execution_role" { + # count = var.create_lambda_role ? 1 : 0 + name = "${var.prefix}-ecs_task_execution_role" +} + +resource "aws_cloudwatch_log_group" "ds_cluster" { + name = "/ecs/${var.prefix}-ds_cluster" + retention_in_days = 30 +} + +resource "aws_ecs_task_definition" "ds_cluster" { + family = "on-demand-task" + requires_compatibilities = ["FARGATE"] + network_mode = "awsvpc" + cpu = "256" + memory = "512" + execution_role_arn = data.aws_iam_role.ecs_task_execution_role.arn + + container_definitions = jsonencode([ + { + name = "app" + image = "ghcr.io/my-org/my-image:latest" # change this + essential = true + logConfiguration = { + logDriver = "awslogs" + options = { + awslogs-group = "/ecs/${var.prefix}-ds_cluster" + awslogs-region = var.aws_region + awslogs-stream-prefix = "ecs" + } + } + } + ]) +} + + + +#resource "null_resource" "run_task" { +# triggers = { +# always_run = timestamp() +# } +# +# provisioner "local-exec" { +# command = < Date: Wed, 21 Jan 2026 12:14:30 -0800 Subject: [PATCH 27/35] feat: docker image from var --- tf-module/uds_catalia/ecs.tf | 2 +- tf-module/uds_catalia/variables.tf | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tf-module/uds_catalia/ecs.tf b/tf-module/uds_catalia/ecs.tf index 38cb02ee..0dd46a58 100644 --- a/tf-module/uds_catalia/ecs.tf +++ b/tf-module/uds_catalia/ecs.tf @@ -23,7 +23,7 @@ resource "aws_ecs_task_definition" "ds_cluster" { container_definitions = jsonencode([ { name = "app" - image = "ghcr.io/my-org/my-image:latest" # change this + image = "${var.uds_docker_name}:${var.uds_docker_version}" # change this essential = true logConfiguration = { logDriver = "awslogs" diff --git a/tf-module/uds_catalia/variables.tf b/tf-module/uds_catalia/variables.tf index d85db08e..a6f882d5 100644 --- a/tf-module/uds_catalia/variables.tf +++ b/tf-module/uds_catalia/variables.tf @@ -246,3 +246,13 @@ variable "uds_ctla_s3_staging_bucket" { # description = "flag to specify if deploying example bucket" #} // << Variables for granules_cnm_ingester END >> + + +variable "uds_docker_name" { + type = string + default = "ghcr.io/unity-sds/unity-data-services" +} + +variable "uds_docker_version" { + type = string +} \ No newline at end of file From 1413a6cb054f95c52fc784db3a8180e46135d797 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 22 Jan 2026 09:51:10 -0800 Subject: [PATCH 28/35] feat: api gateway added --- .../catalya_uds_api/web_service.py | 9 +- .../daac_archiver/daac_archiver_catalia.py | 1 + tf-module/uds_catalia/README.md | 29 ++++++ .../any_to_lambda_module/any_to_lambda.tf | 44 +++++++++ .../any_to_lambda_variables.tf | 17 ++++ tf-module/uds_catalia/api_gateway.tf | 95 +++++++++++++++++++ tf-module/uds_catalia/api_gateway_00_proxy.tf | 20 ++++ .../api_gateway_01_collections_base.tf | 21 ++++ ..._collections_base_00_collection_id_base.tf | 20 ++++ ...ons_base_00_collection_id_base_00_proxy.tf | 20 ++++ .../uds_catalia/api_gateway_02_openapi.tf | 33 +++++++ tf-module/uds_catalia/api_gateway_04_docs.tf | 33 +++++++ .../uds_catalia/cors_module/cors_variables.tf | 35 +++++++ .../uds_catalia/cors_module/working_cors.tf | 43 +++++++++ tf-module/uds_catalia/daac_archiver.tf | 12 +-- tf-module/uds_catalia/uds_api_lambda.tf | 22 +++++ tf-module/uds_catalia/variables.tf | 6 ++ 17 files changed, 452 insertions(+), 8 deletions(-) create mode 100644 tf-module/uds_catalia/README.md create mode 100644 tf-module/uds_catalia/any_to_lambda_module/any_to_lambda.tf create mode 100644 tf-module/uds_catalia/any_to_lambda_module/any_to_lambda_variables.tf create mode 100644 tf-module/uds_catalia/api_gateway.tf create mode 100644 tf-module/uds_catalia/api_gateway_00_proxy.tf create mode 100644 tf-module/uds_catalia/api_gateway_01_collections_base.tf create mode 100644 tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base.tf create mode 100644 tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base_00_proxy.tf create mode 100644 tf-module/uds_catalia/api_gateway_02_openapi.tf create mode 100644 tf-module/uds_catalia/api_gateway_04_docs.tf create mode 100644 tf-module/uds_catalia/cors_module/cors_variables.tf create mode 100644 tf-module/uds_catalia/cors_module/working_cors.tf diff --git a/cumulus_lambda_functions/catalya_uds_api/web_service.py b/cumulus_lambda_functions/catalya_uds_api/web_service.py index ce375ba4..586fa672 100644 --- a/cumulus_lambda_functions/catalya_uds_api/web_service.py +++ b/cumulus_lambda_functions/catalya_uds_api/web_service.py @@ -1,5 +1,7 @@ from fastapi.staticfiles import StaticFiles +from cumulus_lambda_functions.catalya_uds_api import auth_admin_api +from cumulus_lambda_functions.catalya_uds_api import granules_archive_api from cumulus_lambda_functions.uds_api.fast_api_utils import FastApiUtils from cumulus_lambda_functions.lib.lambda_logger_generator import LambdaLoggerGenerator from dotenv import load_dotenv @@ -7,13 +9,12 @@ load_dotenv() import uvicorn -from fastapi import FastAPI +from fastapi import FastAPI, APIRouter from fastapi.middleware.cors import CORSMiddleware from mangum import Mangum from starlette.requests import Request -from cumulus_lambda_functions.uds_api.routes_api import main_router LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) api_base_prefix = FastApiUtils.get_api_base_prefix() @@ -30,6 +31,10 @@ allow_methods=["*"], allow_headers=["*"], ) + +main_router = APIRouter(redirect_slashes=False) +main_router.include_router(auth_admin_api.router) +main_router.include_router(granules_archive_api.router) app.include_router(main_router, prefix=f'/{api_base_prefix}') diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index bc61d2f3..e2599585 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -118,6 +118,7 @@ def archive_collection(self, collection_id): Archive all granules in a collection by querying the STAC Fast API and processing them in parallel. + NOTE: TODO This will not work if there are too many granules.. :param collection_id: The collection ID to archive all granules from :return: self """ diff --git a/tf-module/uds_catalia/README.md b/tf-module/uds_catalia/README.md new file mode 100644 index 00000000..7639129e --- /dev/null +++ b/tf-module/uds_catalia/README.md @@ -0,0 +1,29 @@ +How to get `smce_deployment` Role + +1. Clear content of `~/.aws/credentials` and `~/.aws/config` files +1. Get AWS creds from science cloud: +2. Manually add them to saml-pub + + In Config: + [profile saml-pub] + output = json + region = us-west-2 + sso_session = saml-pub + sso_account_id = 979188186972 + sso_role_name = Project-Power-User + [sso-session saml-pub] + sso_start_url = https://d-9067c5bbc5.awsapps.com/start/# + sso_region = us-east-1 + sso_registration_scopes = sso:account:access + + In Credentials: + [saml-pub] + output = json + region = us-west-2 + aws_access_key_id=add + aws_secret_access_key=add + aws_session_token=add + +3. Run the script `smce_keys_assume_deployment.sh` + - Make sure `TARGET_PROFILE_NAME="mdps-temp-creds-assumed"` not other name +4. Rename that profile to `[default]` in both `~/.aws/credentials` and `~/.aws/config` diff --git a/tf-module/uds_catalia/any_to_lambda_module/any_to_lambda.tf b/tf-module/uds_catalia/any_to_lambda_module/any_to_lambda.tf new file mode 100644 index 00000000..4d487f7c --- /dev/null +++ b/tf-module/uds_catalia/any_to_lambda_module/any_to_lambda.tf @@ -0,0 +1,44 @@ +resource "aws_api_gateway_method" "uds_all_method" { + rest_api_id = var.rest_api_id + resource_id = var.resource_id + http_method = "ANY" + authorization = "CUSTOM" + authorizer_id = var.authorizer_id + request_parameters = { + "method.request.path.proxy" = true + } +} + +resource "aws_api_gateway_method_response" "uds_all_method_response" { + rest_api_id = var.rest_api_id + resource_id = var.resource_id + http_method = aws_api_gateway_method.uds_all_method.http_method + status_code = 200 + response_models = { + "application/json" = "Empty" + } + # response_parameters = { + # "method.response.header.Access-Control-Allow-Origin" = true + # } + depends_on = ["aws_api_gateway_method.uds_all_method"] +} + +resource "aws_api_gateway_integration" "uds_all_lambda_integration" { + rest_api_id = var.rest_api_id + resource_id = var.resource_id + http_method = aws_api_gateway_method.uds_all_method.http_method + type = "AWS_PROXY" + uri = var.lambda_invoke_arn + integration_http_method = "POST" + + # cache_key_parameters = ["method.request.path.proxy"] + + timeout_milliseconds = 29000 + # request_parameters = { + # "integration.request.path.proxy" = "method.request.path.proxy" + # } +} + +output "lambda_integration_object" { + value = aws_api_gateway_integration.uds_all_lambda_integration +} \ No newline at end of file diff --git a/tf-module/uds_catalia/any_to_lambda_module/any_to_lambda_variables.tf b/tf-module/uds_catalia/any_to_lambda_module/any_to_lambda_variables.tf new file mode 100644 index 00000000..b42c7e37 --- /dev/null +++ b/tf-module/uds_catalia/any_to_lambda_module/any_to_lambda_variables.tf @@ -0,0 +1,17 @@ + +variable "rest_api_id" { + type = string +} + +variable "resource_id" { + type = string +} + +variable "authorizer_id" { + type = string +} + +variable "lambda_invoke_arn" { + type = string +} + diff --git a/tf-module/uds_catalia/api_gateway.tf b/tf-module/uds_catalia/api_gateway.tf new file mode 100644 index 00000000..12e8a489 --- /dev/null +++ b/tf-module/uds_catalia/api_gateway.tf @@ -0,0 +1,95 @@ +resource "aws_api_gateway_rest_api" "rest_api" { + name = "Catalya DS API Gateway" + description = "Catalya DS API Gateway" + + api_key_source = "HEADER" + + endpoint_configuration { + types = ["REGIONAL"] + } + + disable_execute_api_endpoint = false +} + +#data "aws_api_gateway_rest_api" "rest_api" { +# # Name of the REST API to look up. If no REST API is found with this name, an error will be returned. +# # If multiple REST APIs are found with this name, an error will be returned. At the moment there is noi data source to +# # get REST API by ID.mm +# name = var.shared_services_rest_api_name +#} + +# API Gateway authorizer resource using existing Lambda +resource "aws_api_gateway_authorizer" "unity_cognito_authorizer" { + name = "unity-cognito-authorizer" + rest_api_id = aws_api_gateway_rest_api.rest_api.id + authorizer_uri = "arn:aws:apigateway:${var.aws_region}:lambda:path/2015-03-31/functions/${aws_lambda_function.uds_api_authorizer.arn}/invocations" + type = "TOKEN" + identity_source = "method.request.header.Authorization" +} + +# Lambda permission for API Gateway to invoke the authorizer +resource "aws_lambda_permission" "authorizer_invoke_permission" { + statement_id = "AllowAPIGatewayInvokeAuthorizer" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.uds_api_authorizer.function_name + principal = "apigateway.amazonaws.com" + source_arn = "${aws_api_gateway_rest_api.rest_api.execution_arn}/authorizers/${aws_api_gateway_authorizer.unity_cognito_authorizer.id}" +} +########################################################################################################################## +# Creates the project API Gateway resource to be pointed to a project level API gateway. +# DEPLOYER SHOULD MODIFY THE VARIABLE var.dapa_api_prefix TO BE THE PROJECT NAME (e.g. "soundersips"). It is TIED to Lambda setting +resource "aws_api_gateway_resource" "uds_api_base_resource" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + parent_id = aws_api_gateway_rest_api.rest_api.root_resource_id + path_part = var.dapa_api_prefix +} + +# +# Creates the wildcard path (proxy+) resource, under the project resource +# + +resource "aws_lambda_permission" "uds_all_lambda_integration__apigw_lambda" { + statement_id = "AllowExecutionFromAPIGatewayWildCard" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.uds_api_1.function_name + principal = "apigateway.amazonaws.com" + + # More: http://docs.aws.amazon.com/apigateway/latest/developerguide/api-gateway-control-access-using-iam-policies-to-invoke-api.html + source_arn = "arn:aws:execute-api:${var.aws_region}:${local.account_id}:${aws_api_gateway_rest_api.rest_api.id}/*/*/${var.dapa_api_prefix}/*" +} + +########################################################################################################################## +# The Shared Services API Gateway deployment +resource "aws_api_gateway_deployment" "shared_services_api_gateway_deployment" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + + lifecycle { + create_before_destroy = true + } + + depends_on = [ + aws_api_gateway_integration.openapi_lambda_integration, + aws_api_gateway_integration.docs_lambda_integration, + + + module.uds_all_cors_method.options_integration_object, + module.uds_all_any_to_lambda_module.lambda_integration_object, + + module.collections_base_cors_method.options_integration_object, + module.collections_base_any_to_lambda_module.lambda_integration_object, + + module.collection_id_base_cors_method.options_integration_object, + module.collection_id_base_any_to_lambda_module.lambda_integration_object, + + module.collection_id_cors_method.options_integration_object, + module.collection_id_any_to_lambda_module.lambda_integration_object, + ] +} + +# The API Gateway stage +resource "aws_api_gateway_stage" "shared_services_api_gateway_stage" { + deployment_id = aws_api_gateway_deployment.shared_services_api_gateway_deployment.id + rest_api_id = aws_api_gateway_rest_api.rest_api.id + stage_name = var.rest_api_stage + description = "Deployed at ${timestamp()}" +} diff --git a/tf-module/uds_catalia/api_gateway_00_proxy.tf b/tf-module/uds_catalia/api_gateway_00_proxy.tf new file mode 100644 index 00000000..4593e7a3 --- /dev/null +++ b/tf-module/uds_catalia/api_gateway_00_proxy.tf @@ -0,0 +1,20 @@ +resource "aws_api_gateway_resource" "uds_all_resource" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + parent_id = aws_api_gateway_resource.uds_api_base_resource.id + path_part = "{proxy+}" +} + +module "uds_all_any_to_lambda_module" { + source = "./any_to_lambda_module" + authorizer_id = aws_api_gateway_authorizer.unity_cognito_authorizer.id + lambda_invoke_arn = aws_lambda_function.uds_api_1.invoke_arn + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.uds_all_resource.id +} + +module "uds_all_cors_method" { + source = "./cors_module" + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.uds_all_resource.id + prefix = "${var.prefix}_uds_all" +} \ No newline at end of file diff --git a/tf-module/uds_catalia/api_gateway_01_collections_base.tf b/tf-module/uds_catalia/api_gateway_01_collections_base.tf new file mode 100644 index 00000000..a53a782a --- /dev/null +++ b/tf-module/uds_catalia/api_gateway_01_collections_base.tf @@ -0,0 +1,21 @@ +resource "aws_api_gateway_resource" "collections_base_resource" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + parent_id = aws_api_gateway_resource.uds_api_base_resource.id + path_part = "collections" +} + +module "collections_base_any_to_lambda_module" { + source = "./any_to_lambda_module" + authorizer_id = aws_api_gateway_authorizer.unity_cognito_authorizer.id + lambda_invoke_arn = aws_lambda_function.uds_api_1.invoke_arn + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.collections_base_resource.id +} + + +module "collections_base_cors_method" { + source = "./cors_module" + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.collections_base_resource.id + prefix = "${var.prefix}_collections_base" +} \ No newline at end of file diff --git a/tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base.tf b/tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base.tf new file mode 100644 index 00000000..47f845ea --- /dev/null +++ b/tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base.tf @@ -0,0 +1,20 @@ +resource "aws_api_gateway_resource" "collection_id_base_resource" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + parent_id = aws_api_gateway_resource.collections_base_resource.id + path_part = "{collectionId}" +} +module "collection_id_base_any_to_lambda_module" { + source = "./any_to_lambda_module" + authorizer_id = aws_api_gateway_authorizer.unity_cognito_authorizer.id + lambda_invoke_arn = aws_lambda_function.uds_api_1.invoke_arn + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.collection_id_base_resource.id +} + + +module "collection_id_base_cors_method" { + source = "./cors_module" + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.collection_id_base_resource.id + prefix = "${var.prefix}_collection_id_base" +} \ No newline at end of file diff --git a/tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base_00_proxy.tf b/tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base_00_proxy.tf new file mode 100644 index 00000000..addbd22c --- /dev/null +++ b/tf-module/uds_catalia/api_gateway_01_collections_base_00_collection_id_base_00_proxy.tf @@ -0,0 +1,20 @@ +resource "aws_api_gateway_resource" "collection_id_resource" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + parent_id = aws_api_gateway_resource.collection_id_base_resource.id + path_part = "{proxy+}" +} +module "collection_id_any_to_lambda_module" { + source = "./any_to_lambda_module" + authorizer_id = aws_api_gateway_authorizer.unity_cognito_authorizer.id + lambda_invoke_arn = aws_lambda_function.uds_api_1.invoke_arn + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.collection_id_resource.id +} + + +module "collection_id_cors_method" { + source = "./cors_module" + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.collection_id_resource.id + prefix = "${var.prefix}_collection_id" +} \ No newline at end of file diff --git a/tf-module/uds_catalia/api_gateway_02_openapi.tf b/tf-module/uds_catalia/api_gateway_02_openapi.tf new file mode 100644 index 00000000..096f6491 --- /dev/null +++ b/tf-module/uds_catalia/api_gateway_02_openapi.tf @@ -0,0 +1,33 @@ +resource "aws_api_gateway_resource" "openapi_resource" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + parent_id = aws_api_gateway_resource.uds_api_base_resource.id + path_part = "openapi" +} + +resource "aws_api_gateway_method" "openapi_method" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.openapi_resource.id + http_method = "GET" + authorization = "NONE" + request_parameters = { + "method.request.path.proxy" = true + } +} + +resource "aws_api_gateway_integration" "openapi_lambda_integration" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.openapi_resource.id + http_method = aws_api_gateway_method.openapi_method.http_method + type = "AWS_PROXY" + uri = aws_lambda_function.uds_api_1.invoke_arn + integration_http_method = "POST" + +# cache_key_parameters = ["method.request.path.proxy"] + + timeout_milliseconds = 29000 +# request_parameters = { +# "integration.request.path.proxy" = "method.request.path.proxy" +# } +} + +########################################################################################################################## \ No newline at end of file diff --git a/tf-module/uds_catalia/api_gateway_04_docs.tf b/tf-module/uds_catalia/api_gateway_04_docs.tf new file mode 100644 index 00000000..b2b9d391 --- /dev/null +++ b/tf-module/uds_catalia/api_gateway_04_docs.tf @@ -0,0 +1,33 @@ +resource "aws_api_gateway_resource" "docs_resource" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + parent_id = aws_api_gateway_resource.uds_api_base_resource.id + path_part = "docs" +} + +resource "aws_api_gateway_method" "docs_method" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.docs_resource.id + http_method = "GET" + authorization = "NONE" + request_parameters = { + "method.request.path.proxy" = true + } +} + +resource "aws_api_gateway_integration" "docs_lambda_integration" { + rest_api_id = aws_api_gateway_rest_api.rest_api.id + resource_id = aws_api_gateway_resource.docs_resource.id + http_method = aws_api_gateway_method.docs_method.http_method + type = "AWS_PROXY" + uri = aws_lambda_function.uds_api_1.invoke_arn + integration_http_method = "POST" + +# cache_key_parameters = ["method.request.path.proxy"] + + timeout_milliseconds = 29000 +# request_parameters = { +# "integration.request.path.proxy" = "method.request.path.proxy" +# } +} + +########################################################################################################################## \ No newline at end of file diff --git a/tf-module/uds_catalia/cors_module/cors_variables.tf b/tf-module/uds_catalia/cors_module/cors_variables.tf new file mode 100644 index 00000000..3fb832ae --- /dev/null +++ b/tf-module/uds_catalia/cors_module/cors_variables.tf @@ -0,0 +1,35 @@ +variable "cors_200_response_parameters" { + type = map(bool) + default = { + "method.response.header.Access-Control-Allow-Credentials" = true + "method.response.header.Access-Control-Allow-Headers" = true + "method.response.header.Access-Control-Allow-Methods" = true + "method.response.header.Access-Control-Allow-Origin" = true + "method.response.header.Access-Control-Expose-Headers" = true + "method.response.header.Access-Control-Max-Age" = true + } +} + +variable "cors_integration_response" { + type = map(string) + default = { + "method.response.header.Access-Control-Allow-Credentials" = "'true'", + "method.response.header.Access-Control-Allow-Headers" = "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token'", + "method.response.header.Access-Control-Allow-Methods" = "'DELETE,GET,HEAD,OPTIONS,PATCH,POST,PUT'", + "method.response.header.Access-Control-Allow-Origin" = "'*'" + "method.response.header.Access-Control-Expose-Headers" = "'Access-Control-Allow-Methods,Access-Control-Expose-Headers,Access-Control-Max-Age'" + "method.response.header.Access-Control-Max-Age" = "'300'" + } +} + +variable "rest_api_id" { + type = string +} + +variable "resource_id" { + type = string +} + +variable "prefix" { + type = string +} diff --git a/tf-module/uds_catalia/cors_module/working_cors.tf b/tf-module/uds_catalia/cors_module/working_cors.tf new file mode 100644 index 00000000..854daa8a --- /dev/null +++ b/tf-module/uds_catalia/cors_module/working_cors.tf @@ -0,0 +1,43 @@ +resource "aws_api_gateway_method" "uds_all_options_method" { + rest_api_id = var.rest_api_id + resource_id = var.resource_id + operation_name = "${var.prefix}-uds_cors" + http_method = "OPTIONS" + authorization = "NONE" +} +resource "aws_api_gateway_method_response" "uds_all_options_200" { + rest_api_id = var.rest_api_id + resource_id = var.resource_id + http_method = aws_api_gateway_method.uds_all_options_method.http_method + status_code = 200 + response_models = { + "application/json" = "Empty" + } + response_parameters = var.cors_200_response_parameters + depends_on = ["aws_api_gateway_method.uds_all_options_method"] +} +resource "aws_api_gateway_integration" "uds_all_options_integration" { + rest_api_id = var.rest_api_id + resource_id = var.resource_id + http_method = aws_api_gateway_method.uds_all_options_method.http_method + type = "MOCK" + request_templates = { + "application/json" = jsonencode( + { + statusCode = 200 + }) + } + depends_on = ["aws_api_gateway_method.uds_all_options_method"] +} + +resource "aws_api_gateway_integration_response" "uds_all_options_integration_response" { + rest_api_id = var.rest_api_id + resource_id = var.resource_id + http_method = aws_api_gateway_method.uds_all_options_method.http_method + status_code = aws_api_gateway_method_response.uds_all_options_200.status_code + response_parameters = var.cors_integration_response + depends_on = ["aws_api_gateway_method_response.uds_all_options_200"] +} +output "options_integration_object" { + value = aws_api_gateway_integration.uds_all_options_integration +} \ No newline at end of file diff --git a/tf-module/uds_catalia/daac_archiver.tf b/tf-module/uds_catalia/daac_archiver.tf index 5a9990fc..f5e0fd77 100644 --- a/tf-module/uds_catalia/daac_archiver.tf +++ b/tf-module/uds_catalia/daac_archiver.tf @@ -59,12 +59,12 @@ resource "aws_ssm_parameter" "daac_archiver_credentials" { name = "/${var.prefix}/daac-archiver/credentials" type = "SecureString" value = jsonencode({ - DS_URL = "TODO" - SFA_USERNAME = "TODO" - SFA_PASSWORD = "TODO" - SFA_AUTH_KEY = "TODO" - SFA_AUTH_VALUE = "TODO" - SFA_BEARER_TOKEN = "TODO" + DS_URL = "https://dps-stac.dit.maap-project.org/" +# SFA_USERNAME = "TODO" +# SFA_PASSWORD = "TODO" +# SFA_AUTH_KEY = "TODO" +# SFA_AUTH_VALUE = "TODO" +# SFA_BEARER_TOKEN = "TODO" }) description = "Secure credentials and configuration for DAAC archiver service" tags = var.tags diff --git a/tf-module/uds_catalia/uds_api_lambda.tf b/tf-module/uds_catalia/uds_api_lambda.tf index fd5d5779..bf28ad12 100644 --- a/tf-module/uds_catalia/uds_api_lambda.tf +++ b/tf-module/uds_catalia/uds_api_lambda.tf @@ -36,3 +36,25 @@ resource "aws_lambda_function" "uds_api_1" { } tags = var.tags } + +resource "aws_lambda_function" "uds_api_authorizer" { + filename = local.lambda_file_name + source_code_hash = filebase64sha256(local.lambda_file_name) + function_name = "${var.prefix}-uds_api_authorizer" + role = local.lambda_role_arn + handler = "cumulus_lambda_functions.catalya_uds_api.web_service.handler" + runtime = "python3.9" + timeout = 300 + memory_size = 512 + environment { + variables = { + LOG_LEVEL = var.log_level + } + } + + vpc_config { + subnet_ids = var.cumulus_lambda_subnet_ids + security_group_ids = local.security_group_ids_set ? var.security_group_ids : [data.aws_security_group.uds_lambda_sg_no_ingress_all_egress.id] + } + tags = var.tags +} diff --git a/tf-module/uds_catalia/variables.tf b/tf-module/uds_catalia/variables.tf index a6f882d5..716878f8 100644 --- a/tf-module/uds_catalia/variables.tf +++ b/tf-module/uds_catalia/variables.tf @@ -255,4 +255,10 @@ variable "uds_docker_name" { variable "uds_docker_version" { type = string +} + +variable "rest_api_stage" { + type = string + description = "REST API Stage Name" + default = "dev" } \ No newline at end of file From 28cee51d9bbdd9780fa033b9b2aa08ef5fdb46c3 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 22 Jan 2026 10:03:08 -0800 Subject: [PATCH 29/35] fix: add missing module --- .../sqs-sns-lambda.tf | 58 +++++++++++++++++++ .../sqs--sns-lambda-connector/sqs_policy.json | 35 +++++++++++ .../sqs--sns-lambda-connector/variables.tf | 41 +++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 tf-module/sqs--sns-lambda-connector/sqs-sns-lambda.tf create mode 100644 tf-module/sqs--sns-lambda-connector/sqs_policy.json create mode 100644 tf-module/sqs--sns-lambda-connector/variables.tf diff --git a/tf-module/sqs--sns-lambda-connector/sqs-sns-lambda.tf b/tf-module/sqs--sns-lambda-connector/sqs-sns-lambda.tf new file mode 100644 index 00000000..909f8b05 --- /dev/null +++ b/tf-module/sqs--sns-lambda-connector/sqs-sns-lambda.tf @@ -0,0 +1,58 @@ +resource "aws_sqs_queue" "dlq" { // https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue + // TODO how to notify admin for failed ingestion? + tags = var.tags + name = "${var.prefix}-dlq-${var.name}" + delay_seconds = 0 + max_message_size = 262144 + message_retention_seconds = 345600 + visibility_timeout_seconds = 300 + receive_wait_time_seconds = 0 + policy = templatefile("${path.module}/sqs_policy.json", { + region: var.aws_region, + roleArn: var.lambda_processing_role_arn, + accountId: var.account_id, + sqsName: "${var.prefix}-dlq-${var.name}", + }) +// redrive_policy = jsonencode({ +// deadLetterTargetArn = aws_sqs_queue.terraform_queue_deadletter.arn +// maxReceiveCount = 4 +// }) +// tags = { +// Environment = "production" +// } +} + +resource "aws_sqs_queue" "main_sqs" { // https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue + name = "${var.prefix}-${var.name}" + delay_seconds = 0 + max_message_size = 262144 + message_retention_seconds = 345600 + visibility_timeout_seconds = var.cool_off // Used as cool off time in seconds. It will wait for 5 min if it fails + receive_wait_time_seconds = 0 + policy = templatefile("${path.module}/sqs_policy.json", { + region: var.aws_region, + roleArn: var.lambda_processing_role_arn, + accountId: var.account_id, + sqsName: "${var.prefix}-${var.name}", + }) + redrive_policy = jsonencode({ + deadLetterTargetArn = aws_sqs_queue.dlq.arn + maxReceiveCount = var.retried_count // How many times it will be retried. + }) + tags = var.tags +} + +resource "aws_sns_topic_subscription" "granules_cnm_ingester_topic_subscription" { // https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_subscription + topic_arn = var.sns_arn + protocol = "sqs" + endpoint = aws_sqs_queue.main_sqs.arn +# filter_policy_scope = "MessageBody" // MessageAttributes. not using attributes +# filter_policy = templatefile("${path.module}/ideas_api_job_results_filter_policy.json", {}) +} + +resource "aws_lambda_event_source_mapping" "granules_cnm_ingester_queue_lambda_trigger" { // https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_event_source_mapping#sqs + event_source_arn = aws_sqs_queue.main_sqs.arn + function_name = var.lambda_arn + batch_size = var.sqs_batch_size + enabled = true +} \ No newline at end of file diff --git a/tf-module/sqs--sns-lambda-connector/sqs_policy.json b/tf-module/sqs--sns-lambda-connector/sqs_policy.json new file mode 100644 index 00000000..9dbf3f9d --- /dev/null +++ b/tf-module/sqs--sns-lambda-connector/sqs_policy.json @@ -0,0 +1,35 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "sqs:Change*", + "sqs:DeleteMessage*", + "sqs:Get*", + "sqs:List*", + "sqs:ReceiveMessage*", + "sqs:SendMessage*" + ], + "Principal": { + "Service": ["sns.amazonaws.com", "sqs.amazonaws.com"] + }, + "Resource": "arn:aws:sqs:${region}:${accountId}:${sqsName}" + }, + { + "Effect": "Allow", + "Action": [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:SendMessage", + "sqs:GetQueueAttributes" + ], + "Resource": "arn:aws:sqs:${region}:${accountId}:${sqsName}", + "Condition": { + "ArnEquals": { + "aws:SourceArn": "${roleArn}" + } + } + } + ] +} \ No newline at end of file diff --git a/tf-module/sqs--sns-lambda-connector/variables.tf b/tf-module/sqs--sns-lambda-connector/variables.tf new file mode 100644 index 00000000..19b26b4a --- /dev/null +++ b/tf-module/sqs--sns-lambda-connector/variables.tf @@ -0,0 +1,41 @@ +variable "prefix" { + type = string +} +variable "name" { + type = string +} +variable "sns_arn" { + type = string +} +variable "lambda_arn" { + type = string +} +variable "sqs_batch_size" { + type = number + default = 1 +} +variable "retried_count" { + type = number + default = 3 +} +variable "cool_off" { + type = number + default = 300 + description = "visibility time out for sqs. in seconds" +} +variable "aws_region" { + type = string + default = "us-west-2" +} +variable "tags" { + description = "Tags to be applied to Cumulus resources that support tags" + type = map(string) + default = {} +} +variable "lambda_processing_role_arn" { + type = string +} +variable "account_id" { + type = string + description = "AWS Account ID" +} \ No newline at end of file From 41aaeffc680d3ea1631e344d5203b53b26a86ca8 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 22 Jan 2026 11:32:03 -0800 Subject: [PATCH 30/35] feat: bucket creation --- tf-module/uds_catalia_bucket/outputs.tf | 3 + .../uds_catalia_bucket/s3_bucket_creation.tf | 42 +++++++++++++ .../uds_catalia_bucket/s3_bucket_policy.json | 39 ++++++++++++ .../uds_catalia_bucket/terraform.tf.example | 7 +++ .../terraform.tfvars.example | 6 ++ tf-module/uds_catalia_bucket/variables.tf | 60 +++++++++++++++++++ 6 files changed, 157 insertions(+) create mode 100644 tf-module/uds_catalia_bucket/outputs.tf create mode 100644 tf-module/uds_catalia_bucket/s3_bucket_creation.tf create mode 100644 tf-module/uds_catalia_bucket/s3_bucket_policy.json create mode 100644 tf-module/uds_catalia_bucket/terraform.tf.example create mode 100644 tf-module/uds_catalia_bucket/terraform.tfvars.example create mode 100644 tf-module/uds_catalia_bucket/variables.tf diff --git a/tf-module/uds_catalia_bucket/outputs.tf b/tf-module/uds_catalia_bucket/outputs.tf new file mode 100644 index 00000000..c1fe2db9 --- /dev/null +++ b/tf-module/uds_catalia_bucket/outputs.tf @@ -0,0 +1,3 @@ +output "datastore_bucket_name" { + value = aws_s3_bucket.datastore_bucket.id +} \ No newline at end of file diff --git a/tf-module/uds_catalia_bucket/s3_bucket_creation.tf b/tf-module/uds_catalia_bucket/s3_bucket_creation.tf new file mode 100644 index 00000000..f1362bc6 --- /dev/null +++ b/tf-module/uds_catalia_bucket/s3_bucket_creation.tf @@ -0,0 +1,42 @@ +locals { + bucket_tags = merge( + var.tags, + { + "Proj" = var.project + "Venue" = var.venue + "Env" = var.venue + "ServiceArea" = "ds" + "CapVersion" = "1.0.0" + "Component" = "DatastoreBucket" + "CreatedBy" = "ds" + "Stack" = "DatastoreBucket" + "Capability" = "datastore" + "Name" = "${var.project}-${var.venue}-ds-datastore-bucket" + } + ) +} + + +resource "aws_s3_bucket" "datastore_bucket" { + bucket = lower(replace("${var.project}-${var.venue}-unity-${var.datastore_bucket_name}", "_", "-")) + tags = local.bucket_tags +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "datastore_bucket" { // https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_server_side_encryption_configuration + bucket = aws_s3_bucket.datastore_bucket.id + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +resource "aws_s3_bucket_policy" "datastore_bucket" { + // https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy + bucket = aws_s3_bucket.datastore_bucket.id + policy = templatefile("${path.module}/s3_bucket_policy.json", { + udsAwsAccount: var.account_id, + s3BucketName: aws_s3_bucket.datastore_bucket.id, + cumulus_lambda_processing_role_name: "${var.prefix}-lambda-processing", + }) +} diff --git a/tf-module/uds_catalia_bucket/s3_bucket_policy.json b/tf-module/uds_catalia_bucket/s3_bucket_policy.json new file mode 100644 index 00000000..4ac126c2 --- /dev/null +++ b/tf-module/uds_catalia_bucket/s3_bucket_policy.json @@ -0,0 +1,39 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "DelegateS3Access", + "Effect": "Allow", + "Principal": { + "Service": "s3.amazonaws.com", + "AWS": [ + "arn:aws:iam::${udsAwsAccount}:role/${cumulus_lambda_processing_role_name}" + ] + }, + "Action": [ + "s3:ListBucket", + "s3:GetObject*", + "s3:PutObject*" + ], + "Resource": [ + "arn:aws:s3:::${s3BucketName}/*", + "arn:aws:s3:::${s3BucketName}" + ] + }, + { + "Sid": "AllowSSLRequestsOnly", + "Action": "s3:*", + "Effect": "Deny", + "Resource": [ + "arn:aws:s3:::${s3BucketName}/*", + "arn:aws:s3:::${s3BucketName}" + ], + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + }, + "Principal": "*" + } + ] +} diff --git a/tf-module/uds_catalia_bucket/terraform.tf.example b/tf-module/uds_catalia_bucket/terraform.tf.example new file mode 100644 index 00000000..486cd513 --- /dev/null +++ b/tf-module/uds_catalia_bucket/terraform.tf.example @@ -0,0 +1,7 @@ +terraform { + backend "s3" { + region = "us-west-2" + bucket = "catalya-app-catalog" + key = "catalya-uds-dev/ds_s3/terraform.tfstate" + } +} diff --git a/tf-module/uds_catalia_bucket/terraform.tfvars.example b/tf-module/uds_catalia_bucket/terraform.tfvars.example new file mode 100644 index 00000000..2e8c4293 --- /dev/null +++ b/tf-module/uds_catalia_bucket/terraform.tfvars.example @@ -0,0 +1,6 @@ +project = "Catalya-DS" +venue = "dev" +datastore_bucket_name = "william_test_1" +datastore_bucket_notification_prefix = "stage_out" +prefix="catalya-uds-dev" +account_id="979188186972" diff --git a/tf-module/uds_catalia_bucket/variables.tf b/tf-module/uds_catalia_bucket/variables.tf new file mode 100644 index 00000000..8c24bb8a --- /dev/null +++ b/tf-module/uds_catalia_bucket/variables.tf @@ -0,0 +1,60 @@ +variable "installprefix" { + type = string + default = "" + description = "This is not needed, but required by UCS Marketplace. Empty string is good enough for manual deployment" +} +variable "deployment_name" { + type = string + default = "" + description = "This is not needed, but required by UCS Marketplace. Empty string is good enough for manual deployment" +} +variable "project" { + type = string + default = "UnknownProject" + description = "Name of Project" +} +variable "venue" { + type = string + default = "Unknownvenue" + description = "Name of Project" +} +variable "tags" { + description = "Tags to be applied to Cumulus resources that support tags" + type = map(string) + default = {} +} +variable "account_id" { + type = string + description = "AWS Account ID" +} + +variable "aws_region" { + type = string + default = "us-west-2" +} + +variable "prefix" { + type = string +} +variable "datastore_bucket_name" { + type = string + description = "name of S3 bucket. Note-1: it will be prefixed with '--unity-'. Note-2: It should only have '-'. '_' will be replaced with '-'" +} +variable "datastore_bucket_notification_prefix" { + type = string + default = "stage_out" + description = "path to the directory where catalogs.json will be written" +} + +variable "cumulus_lambda_processing_role_name_postfix" { + type = string + default = "lambda-processing" + description = "name of the Lambda Processing role by Cumulus after `prefix`" +} + +variable "cumulus_sf_lambda_role_name_postfix" { + type = string + default = "_sf_event_sqs_to_db_records_lambda_role" + description = "name of the Lambda role by Cumulus SF after `prefix`" +} + From c5b09ffef23297d1227a1b076bb213fffd602d82 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 22 Jan 2026 11:41:15 -0800 Subject: [PATCH 31/35] chore: update description --- cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py | 2 +- tf-module/uds_catalia/variables.tf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py index e2599585..1e2adbd0 100644 --- a/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py +++ b/cumulus_lambda_functions/daac_archiver/daac_archiver_catalia.py @@ -61,7 +61,7 @@ class DaacArchiverCatalia: def __init__(self): self.__sns = AwsSns() self.__s3 = AwsS3() - self.__staged_s3_bucket = 'SET_ME_UP' # TODO + self.__staged_s3_bucket = 'SET_ME_UP' # DONE. There is validation to see if it's original value, it will throw an error. self.__status_ddb = CataliaStatusDb(os.getenv('CATALYA_STATUS_DB', None)) self.__daac_agreements = [] sfa_auth_ssm_key = os.getenv('SFA_AUTH', None) diff --git a/tf-module/uds_catalia/variables.tf b/tf-module/uds_catalia/variables.tf index 716878f8..aedaab7a 100644 --- a/tf-module/uds_catalia/variables.tf +++ b/tf-module/uds_catalia/variables.tf @@ -238,6 +238,7 @@ variable "granules_cnm_ingester__s3_glob" { variable "uds_ctla_s3_staging_bucket" { type = string + description = "outputs of uds_catalia_bucket module. Example: catalya-ds-dev-unity-william-test-1" } #variable "granules_cnm_ingester__is_deploying_bucket" { From 1974cbb659d2f53eaf3d7b991fb0e9839fea7142 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Thu, 22 Jan 2026 12:53:03 -0800 Subject: [PATCH 32/35] feat: starting authorizer --- .../keycloak_authorizer/README.md | 33 ++++++++ .../keycloak_authorizer/__init__.py | 0 .../keycloak_authorizer/lambda_function.py | 79 +++++++++++++++++++ tf-module/uds_catalia/uds_api_lambda.tf | 2 +- 4 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 cumulus_lambda_functions/keycloak_authorizer/README.md create mode 100644 cumulus_lambda_functions/keycloak_authorizer/__init__.py create mode 100644 cumulus_lambda_functions/keycloak_authorizer/lambda_function.py diff --git a/cumulus_lambda_functions/keycloak_authorizer/README.md b/cumulus_lambda_functions/keycloak_authorizer/README.md new file mode 100644 index 00000000..4cca265b --- /dev/null +++ b/cumulus_lambda_functions/keycloak_authorizer/README.md @@ -0,0 +1,33 @@ +# Keycloak Authorizer (Placeholder) + +This is a **temporary placeholder** Lambda authorizer that allows all requests for testing purposes. + +## Current Behavior + +- **Allows all requests** without validation +- Adds fake JWT token context similar to what Keycloak would provide +- Returns fake user information for testing + +## Fake Context Provided + +The authorizer adds the following fake context to requests: + +- `userId`: test-user-123 +- `username`: test-user +- `email`: test-user@example.com +- `name`: Test User +- `roles`: ["user", "admin", "developer"] +- `groups`: ["/unity/developers", "/unity/users"] +- `jwtToken`: Base64-encoded fake JWT payload +- `authType`: PLACEHOLDER_KEYCLOAK (flag to indicate this is a placeholder) + +## TODO + +⚠️ **Replace with actual Keycloak integration** once Keycloak is connected and configured. + +The actual implementation should: +1. Validate JWT tokens from Keycloak +2. Verify token signatures +3. Check token expiration +4. Extract real user claims from the token +5. Enforce proper authorization policies diff --git a/cumulus_lambda_functions/keycloak_authorizer/__init__.py b/cumulus_lambda_functions/keycloak_authorizer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cumulus_lambda_functions/keycloak_authorizer/lambda_function.py b/cumulus_lambda_functions/keycloak_authorizer/lambda_function.py new file mode 100644 index 00000000..ffd23f00 --- /dev/null +++ b/cumulus_lambda_functions/keycloak_authorizer/lambda_function.py @@ -0,0 +1,79 @@ +""" +Placeholder Keycloak Lambda Authorizer +This is a temporary authorizer that allows all requests and adds fake Keycloak JWT token data. +Replace with actual Keycloak integration once available. +""" + +import json +import base64 + + +def lambda_handler(event, context): + """ + Placeholder Lambda authorizer that allows all requests. + Adds fake context similar to what Keycloak would provide from a JWT token. + + :param event: API Gateway authorizer event + :param context: Lambda context + :return: IAM policy document allowing the request with fake user context + """ + + # Extract the authorization token (even though we're not validating it) + token = event.get('authorizationToken', 'Bearer fake-token') + method_arn = event.get('methodArn', '') + + # Create a fake JWT token payload similar to what Keycloak would provide + fake_jwt_payload = { + "sub": "test-user-123", + "preferred_username": "test-user", + "email": "test-user@example.com", + "name": "Test User", + "given_name": "Test", + "family_name": "User", + "realm_access": { + "roles": ["user", "admin", "developer"] + }, + "resource_access": { + "unity-api": { + "roles": ["read", "write"] + } + }, + "groups": ["/unity/developers", "/unity/users"], + "iat": 1642000000, + "exp": 1642003600, + "iss": "https://keycloak.example.com/auth/realms/unity", + "aud": "unity-api" + } + + # Encode as base64 to simulate a JWT token in context + fake_jwt_string = base64.b64encode(json.dumps(fake_jwt_payload).encode()).decode() + + # Generate the IAM policy document that allows all actions + policy = { + "principalId": fake_jwt_payload["sub"], + "policyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Action": "execute-api:Invoke", + "Effect": "Allow", + "Resource": method_arn.split('/')[0] + '/*' if method_arn else '*' + } + ] + }, + "context": { + # Add fake user context that would normally come from Keycloak JWT + "userId": fake_jwt_payload["sub"], + "username": fake_jwt_payload["preferred_username"], + "email": fake_jwt_payload["email"], + "name": fake_jwt_payload["name"], + "roles": json.dumps(fake_jwt_payload["realm_access"]["roles"]), + "groups": json.dumps(fake_jwt_payload["groups"]), + # Fake JWT token (base64 encoded) - simulating what Keycloak would provide + "jwtToken": fake_jwt_string, + # Add a flag to indicate this is a placeholder + "authType": "PLACEHOLDER_KEYCLOAK" + } + } + + return policy diff --git a/tf-module/uds_catalia/uds_api_lambda.tf b/tf-module/uds_catalia/uds_api_lambda.tf index bf28ad12..525dbfc0 100644 --- a/tf-module/uds_catalia/uds_api_lambda.tf +++ b/tf-module/uds_catalia/uds_api_lambda.tf @@ -42,7 +42,7 @@ resource "aws_lambda_function" "uds_api_authorizer" { source_code_hash = filebase64sha256(local.lambda_file_name) function_name = "${var.prefix}-uds_api_authorizer" role = local.lambda_role_arn - handler = "cumulus_lambda_functions.catalya_uds_api.web_service.handler" + handler = "cumulus_lambda_functions.keycloak_authorizer.lambda_function.lambda_handler" runtime = "python3.9" timeout = 300 memory_size = 512 From bc9af439ad51129122345d738a33d771b1e1a9dc Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Tue, 27 Jan 2026 10:31:30 -0800 Subject: [PATCH 33/35] fix: add backoff lib --- requirements.txt | 1 + tf-module/uds_catalia/main.tf | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d5e67953..b4e2af79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ annotated-types==0.7.0 anyio==4.6.0 attrs==24.2.0 +backoff==2.2.1 certifi==2024.8.30 charset-normalizer==3.3.2 click==8.1.7 diff --git a/tf-module/uds_catalia/main.tf b/tf-module/uds_catalia/main.tf index 03f67e3d..d80fc170 100644 --- a/tf-module/uds_catalia/main.tf +++ b/tf-module/uds_catalia/main.tf @@ -31,4 +31,3 @@ data "aws_iam_role" "lambda_processing" { # count = var.create_lambda_role ? 1 : 0 name = "${var.prefix}-lambda-processing" } - From 0fb6021f2838a76bce34c42dcc0c3a6af62585b3 Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 28 Jan 2026 11:12:31 -0800 Subject: [PATCH 34/35] fix: minor fixes during trial e2e test --- .../catalya_uds_api/granules_archive_api.py | 24 +++++++++---------- tf-module/mock_daac/main.tf | 2 +- .../uds_lambda_processing_role.tf | 8 +++++++ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py index 182fb9a4..658cd24b 100644 --- a/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py +++ b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py @@ -23,14 +23,14 @@ class ArchivingTypesModel(BaseModel): data_type: str file_extension: Optional[list[str]] = [] + class DaacUpdateModel(BaseModel): - daac_collection_id: str api_key: str - daac_provider: Optional[str] = None - daac_data_version: Optional[str] = None - daac_sns_topic_arn: Optional[str] = None - daac_role_arn: Optional[str] = None - daac_role_session_name: Optional[str] = None + daac_provider: str + daac_data_version: str + daac_sns_topic_arn: str + daac_role_arn: str + daac_role_session_name: str archiving_types: Optional[list[ArchivingTypesModel]] = None class InternalDDBConnector: @@ -65,12 +65,12 @@ async def add_daac_archive_config(request: Request, collection_id: str, daac_col LOGGER.debug(f'started add_daac_archive_config. {new_body.model_dump()}') i1 = InternalDDBConnector() authorized_daacs = i1.archive_methods_initiator(request, collection_id, daac_collection_id) - authorized_ldaps = [k['userGroup'] for k in authorized_daacs] + # authorized_ldaps = [k['userGroup'] for k in authorized_daacs] b1 = new_body.model_dump() try: # def add(self, catalia_collection, daac_collection, api_key, provider, data_version, sns_topic_arn, role_arn, role_session_name, archiving_types, user, user_group): i1.cdhsd.add(collection_id, daac_collection_id, b1['api_key'], b1['daac_provider'], b1['daac_data_version'], - b1['daac_sns_topic_arn'], b1['daac_role_arn'], b1['daac_role_session_name'], b1['archiving_types'], i1.auth_info['username'], authorized_ldaps) + b1['daac_sns_topic_arn'], b1['daac_role_arn'], b1['daac_role_session_name'], b1['archiving_types'], i1.auth_info['username'], i1.auth_info.get('ldap_groups')) except Exception as e: LOGGER.exception(f'error while add_daac_archive_config: {b1}') raise HTTPException(status_code=500, detail=e) @@ -108,8 +108,8 @@ async def archive_single_granule(request: Request, collection_id: str, granule_i LOGGER.debug(f'started archive_single_granule.') i1 = InternalDDBConnector() authorized_daacs = i1.archive_methods_initiator(request, collection_id, None) - authorized_ldaps = set([k['userGroup'] for k in authorized_daacs]) - authorized_configured_daac_configs = [k for k in i1.configured_daac_configs if k[i1.cdhsd.target_project] in authorized_ldaps] + # authorized_ldaps = set([k['userGroup'] for k in authorized_daacs]) + authorized_configured_daac_configs = [k for k in i1.configured_daac_configs if k[i1.cdhsd.target_project] in authorized_daacs] dac = DaacArchiverCatalia() dac.staged_s3_bucket = os.getenv('CATALYA_UDS_STAGING_BUCKET') dac.daac_agreements = authorized_configured_daac_configs @@ -122,8 +122,8 @@ async def archive_entire_collection(request: Request, collection_id: str): LOGGER.debug(f'started archive_entire_collection.') i1 = InternalDDBConnector() authorized_daacs = i1.archive_methods_initiator(request, collection_id, None) - authorized_ldaps = set([k['userGroup'] for k in authorized_daacs]) - authorized_configured_daac_configs = [k for k in i1.configured_daac_configs if k[i1.cdhsd.target_project] in authorized_ldaps] + # authorized_ldaps = set([k['userGroup'] for k in authorized_daacs]) + authorized_configured_daac_configs = [k for k in i1.configured_daac_configs if k[i1.cdhsd.target_project] in authorized_daacs] dac = DaacArchiverCatalia() dac.staged_s3_bucket = os.getenv('CATALYA_UDS_STAGING_BUCKET') dac.daac_agreements = authorized_configured_daac_configs diff --git a/tf-module/mock_daac/main.tf b/tf-module/mock_daac/main.tf index 5dbaa0a4..46f8a43c 100644 --- a/tf-module/mock_daac/main.tf +++ b/tf-module/mock_daac/main.tf @@ -39,7 +39,7 @@ data "aws_iam_policy_document" "mock_daac_lambda_assume_role_policy" { # IAM Role for Lambda Function resource "aws_iam_role" "mock_daac_lambda_role" { name = "${var.prefix}-mock_daac_lambda_role" - permissions_boundary = "arn:aws:iam::${local.account_id}:policy/mcp-tenantOperator-AMI-APIG" + permissions_boundary = "arn:aws:iam::${local.account_id}:policy/zsmce-tenantOperator-AMI-APIG" assume_role_policy = jsonencode({ Version = "2012-10-17", Statement = [ diff --git a/tf-module/uds_catalia_iam/uds_lambda_processing_role.tf b/tf-module/uds_catalia_iam/uds_lambda_processing_role.tf index 4dbbc0fb..986b0dd7 100644 --- a/tf-module/uds_catalia_iam/uds_lambda_processing_role.tf +++ b/tf-module/uds_catalia_iam/uds_lambda_processing_role.tf @@ -133,6 +133,14 @@ data "aws_iam_policy_document" "lambda_processing_policy" { resources = ["arn:aws:sqs:${var.aws_region}:${data.aws_caller_identity.current.account_id}:*"] } + statement { + actions = [ + "ssm:GetParameter", + "ssm:GetParameters", + ] + resources = ["arn:aws:ssm:${var.aws_region}:${var.account_id}:parameter/${var.prefix}/*"] + } + # statement { # actions = ["kms:Decrypt"] # resources = [module.archive.provider_kms_key_arn] From 3f8eb86151a9c61335c51f58e05548e233262f5b Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Wed, 28 Jan 2026 12:09:12 -0800 Subject: [PATCH 35/35] feat: running in background lambda --- .../catalya_uds_api/granules_archive_api.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py index 658cd24b..f8f250e9 100644 --- a/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py +++ b/cumulus_lambda_functions/catalya_uds_api/granules_archive_api.py @@ -10,6 +10,7 @@ from cumulus_lambda_functions.uds_api.fast_api_utils import FastApiUtils from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel +from mdps_ds_lib.lib.aws.aws_lambda import AwsLambda LOGGER = LambdaLoggerGenerator.get_logger(__name__, LambdaLoggerGenerator.get_level_from_env()) @@ -104,6 +105,65 @@ async def get_daac_archive_config(request: Request, collection_id: str, daac_col @router.put("/{collection_id}/archive/{granule_id}") @router.put("/{collection_id}/archive/{granule_id}/") +async def archive_single_granule(request: Request, collection_id: str, granule_id: str): + LOGGER.debug(f'started archive_single_granule.') + i1 = InternalDDBConnector() + authorized_daacs = i1.archive_methods_initiator(request, collection_id, None) + # authorized_ldaps = set([k['userGroup'] for k in authorized_daacs]) + authorized_configured_daac_configs = [k for k in i1.configured_daac_configs if k[i1.cdhsd.target_project] in authorized_daacs] + + if os.getenv('IS_API_IN_DOCKER', 'FALSE') == 'TRUE': + LOGGER.debug(f'In docker. No time limit for archiving') + dac = DaacArchiverCatalia() + dac.staged_s3_bucket = os.getenv('CATALYA_UDS_STAGING_BUCKET') + dac.daac_agreements = authorized_configured_daac_configs + dac.archive_granule(collection_id, granule_id) + return {'message': 'archive initiated'} + + # Async invocation for API Gateway to avoid timeout + archive_lambda_name = os.environ.get('ARCHIVE_LAMBDA_NAME', '').strip() + if not archive_lambda_name: + raise HTTPException(status_code=500, detail='ARCHIVE_LAMBDA_NAME environment variable not set') + + bearer_token = request.headers.get('authorization', '') + actual_path = f'{request.url.path}/actual' if not request.url.path.endswith('/') else f'{request.url.path}actual' + + actual_event = { + 'resource': actual_path, + 'path': actual_path, + 'httpMethod': 'PUT', + 'headers': { + 'Authorization': bearer_token, + 'Accept': '*/*', + 'Host': request.url.hostname, + }, + 'pathParameters': { + 'collection_id': collection_id, + 'granule_id': granule_id + }, + 'requestContext': { + 'resourcePath': actual_path, + 'httpMethod': 'PUT', + 'domainName': request.url.hostname, + }, + 'body': json.dumps({ + 'authorized_configured_daac_configs': authorized_configured_daac_configs + }), + 'isBase64Encoded': False + } + + LOGGER.info(f'Invoking async lambda for archive: {archive_lambda_name}') + response = AwsLambda().invoke_function( + function_name=archive_lambda_name, + payload=actual_event, + ) + LOGGER.debug(f'Async archive function started: {response}') + + return {'message': 'archive processing', 'statusCode': 202} + + +@router.put("/{collection_id}/archive/{granule_id}/actual") +@router.put("/{collection_id}/archive/{granule_id}/actual/") async def archive_single_granule(request: Request, collection_id: str, granule_id: str): LOGGER.debug(f'started archive_single_granule.') i1 = InternalDDBConnector()