From 2ac625d311932de90250d8d671345dd97eebb325 Mon Sep 17 00:00:00 2001 From: Grant Foster Date: Tue, 3 Mar 2026 13:49:45 -0800 Subject: [PATCH 1/5] fix: weight setting --- validator/weights.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validator/weights.py b/validator/weights.py index d52db13..22dedd1 100644 --- a/validator/weights.py +++ b/validator/weights.py @@ -352,7 +352,7 @@ def _get_delta_node_data(self, telemetry_data: List[NodeData]) -> List[NodeData] # Extract platform metrics from delta stats delta_platform_metrics = ( - platform_manager.extract_platform_metrics_from_stats( + self.platform_manager.extract_platform_metrics_from_stats( delta_stats_json ) ) @@ -381,7 +381,7 @@ def _get_delta_node_data(self, telemetry_data: List[NodeData]) -> List[NodeData] for ( platform_name, platform_config, - ) in platform_manager.get_all_platforms().items(): + ) in self.platform_manager.get_all_platforms().items(): for error_metric in platform_config.error_metrics: raw_field = platform_config.get_raw_field_name(error_metric) total_errors += delta_stats_json.get(raw_field, 0) From 47209958e2ff2e17f6b43aa7718e709251488086 Mon Sep 17 00:00:00 2001 From: Grant Foster Date: Tue, 3 Mar 2026 14:16:46 -0800 Subject: [PATCH 2/5] feat: keep tee api list clean --- validator/node_manager.py | 79 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/validator/node_manager.py b/validator/node_manager.py index 2477897..fb812e9 100644 --- a/validator/node_manager.py +++ b/validator/node_manager.py @@ -1,5 +1,5 @@ import random -from typing import Dict, Optional +from typing import Dict, Optional, List from fiber.networking.models import NodeWithFernet as Node from fiber.encrypted.validator import handshake, client as vali_client from cryptography.fernet import Fernet @@ -11,6 +11,7 @@ from validator.telemetry import TEETelemetryClient from validator.errors_storage import ErrorsStorage import asyncio +import aiohttp from datetime import datetime import weakref @@ -235,11 +236,85 @@ async def remove_disconnected_nodes(self): ) keys_to_delete.append(hotkey) - logger.info(f"Deleteing keys from connected nodes: {keys_to_delete}") + logger.info(f"Deleting keys from connected nodes: {keys_to_delete}") for hotkey in keys_to_delete: + # Get TEE addresses before clearing so we can remove from masa-tee-api + tee_addresses = self.validator.routing_table.get_miner_addresses(hotkey) + + # Remove each TEE address from the masa-tee-api + for address, worker_id in tee_addresses: + await self._remove_tee_worker_from_api(address, hotkey) + del self.connected_nodes[hotkey] self.validator.routing_table.clear_miner(hotkey) + async def _remove_tee_worker_from_api(self, address: str, hotkey: str) -> bool: + """ + Remove a TEE worker from the MASA TEE API when a miner deregisters. + + Args: + address: The TEE worker address to remove + hotkey: The hotkey of the deregistered miner (for logging) + + Returns: + True if removal was successful, False otherwise + """ + masa_tee_api = os.getenv("MASA_TEE_API", "") + masa_tee_api_key = os.getenv("MASA_TEE_API_KEY", "") + + if not masa_tee_api: + logger.debug( + f"MASA_TEE_API not configured, skipping TEE worker removal for {address}" + ) + return False + + if not masa_tee_api_key: + logger.debug( + f"MASA_TEE_API_KEY not configured, skipping TEE worker removal for {address}" + ) + return False + + try: + base_url = masa_tee_api.rstrip("/") + api_endpoint = f"{base_url}/remove-tee-worker" + payload = {"address": address} + headers = { + "X-API-Key": masa_tee_api_key, + "Content-Type": "application/json", + } + + logger.info( + f"Removing TEE worker from MASA API: {address} (hotkey: {hotkey})" + ) + + async with aiohttp.ClientSession() as session: + async with session.post( + api_endpoint, json=payload, headers=headers + ) as response: + if response.status == 200: + logger.info( + f"Successfully removed TEE worker from MASA API: {address}" + ) + return True + elif response.status == 404: + logger.debug( + f"TEE worker not found in MASA API (already removed?): {address}" + ) + return True + else: + response_text = await response.text() + logger.warning( + f"Failed to remove TEE worker from MASA API: " + f"{response.status} - {response_text}" + ) + return False + + except Exception as e: + logger.error( + f"Error removing TEE worker from MASA API: {address} - {str(e)}" + ) + return False + async def send_custom_message(self, node_hotkey: str, message: str) -> None: """ Send a custom message to a specific miner. From 5c4cef65abaac97c0bb9a80148d63a8b63ce4286 Mon Sep 17 00:00:00 2001 From: Grant Foster Date: Tue, 3 Mar 2026 14:22:35 -0800 Subject: [PATCH 3/5] chore: timeout if API does go down --- validator/node_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/validator/node_manager.py b/validator/node_manager.py index fb812e9..131c550 100644 --- a/validator/node_manager.py +++ b/validator/node_manager.py @@ -287,7 +287,8 @@ async def _remove_tee_worker_from_api(self, address: str, hotkey: str) -> bool: f"Removing TEE worker from MASA API: {address} (hotkey: {hotkey})" ) - async with aiohttp.ClientSession() as session: + timeout = aiohttp.ClientTimeout(total=10) # 10 second timeout + async with aiohttp.ClientSession(timeout=timeout) as session: async with session.post( api_endpoint, json=payload, headers=headers ) as response: From 29642f9639e041c37f70c4346820a087ed098956 Mon Sep 17 00:00:00 2001 From: Grant Foster Date: Tue, 3 Mar 2026 14:26:13 -0800 Subject: [PATCH 4/5] chore: address copilot review comments - Remove unused List import from typing - Use _ for unused worker_id in loop Made-with: Cursor --- validator/node_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validator/node_manager.py b/validator/node_manager.py index 131c550..5c61182 100644 --- a/validator/node_manager.py +++ b/validator/node_manager.py @@ -1,5 +1,5 @@ import random -from typing import Dict, Optional, List +from typing import Dict, Optional from fiber.networking.models import NodeWithFernet as Node from fiber.encrypted.validator import handshake, client as vali_client from cryptography.fernet import Fernet @@ -242,7 +242,7 @@ async def remove_disconnected_nodes(self): tee_addresses = self.validator.routing_table.get_miner_addresses(hotkey) # Remove each TEE address from the masa-tee-api - for address, worker_id in tee_addresses: + for address, _ in tee_addresses: await self._remove_tee_worker_from_api(address, hotkey) del self.connected_nodes[hotkey] From 14e9093121c410bf1556ce3481f1ddae9226182d Mon Sep 17 00:00:00 2001 From: Grant Foster Date: Tue, 3 Mar 2026 14:33:15 -0800 Subject: [PATCH 5/5] refactor: use existing httpx client for MASA API calls - Switch from aiohttp to existing http_client_manager (httpx) - Removes new dependency, reuses connection pool - Re-raise CancelledError for clean shutdown Made-with: Cursor --- validator/node_manager.py | 52 ++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/validator/node_manager.py b/validator/node_manager.py index 5c61182..2fa919c 100644 --- a/validator/node_manager.py +++ b/validator/node_manager.py @@ -11,7 +11,6 @@ from validator.telemetry import TEETelemetryClient from validator.errors_storage import ErrorsStorage import asyncio -import aiohttp from datetime import datetime import weakref @@ -278,38 +277,35 @@ async def _remove_tee_worker_from_api(self, address: str, hotkey: str) -> bool: base_url = masa_tee_api.rstrip("/") api_endpoint = f"{base_url}/remove-tee-worker" payload = {"address": address} - headers = { - "X-API-Key": masa_tee_api_key, - "Content-Type": "application/json", - } + headers = {"X-API-Key": masa_tee_api_key} logger.info( f"Removing TEE worker from MASA API: {address} (hotkey: {hotkey})" ) - timeout = aiohttp.ClientTimeout(total=10) # 10 second timeout - async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.post( - api_endpoint, json=payload, headers=headers - ) as response: - if response.status == 200: - logger.info( - f"Successfully removed TEE worker from MASA API: {address}" - ) - return True - elif response.status == 404: - logger.debug( - f"TEE worker not found in MASA API (already removed?): {address}" - ) - return True - else: - response_text = await response.text() - logger.warning( - f"Failed to remove TEE worker from MASA API: " - f"{response.status} - {response_text}" - ) - return False - + response = await self.validator.http_client_manager.client.post( + api_endpoint, json=payload, headers=headers, timeout=10.0 + ) + + if response.status_code == 200: + logger.info( + f"Successfully removed TEE worker from MASA API: {address}" + ) + return True + elif response.status_code == 404: + logger.debug( + f"TEE worker not found in MASA API (already removed?): {address}" + ) + return True + else: + logger.warning( + f"Failed to remove TEE worker from MASA API: " + f"{response.status_code} - {response.text}" + ) + return False + + except asyncio.CancelledError: + raise except Exception as e: logger.error( f"Error removing TEE worker from MASA API: {address} - {str(e)}"