From 4a4ee363de790ed1225adc1e96858101acf44be5 Mon Sep 17 00:00:00 2001 From: anitamnd Date: Wed, 11 Mar 2026 14:50:35 +0100 Subject: [PATCH 1/4] add validation script for protected fields in biotools.json files --- .../runbiotools/validate-biotools-fields.sh | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 scripts/runbiotools/validate-biotools-fields.sh diff --git a/scripts/runbiotools/validate-biotools-fields.sh b/scripts/runbiotools/validate-biotools-fields.sh new file mode 100644 index 0000000..ba39b39 --- /dev/null +++ b/scripts/runbiotools/validate-biotools-fields.sh @@ -0,0 +1,64 @@ +#!/bin/bash +set -e + +# Validate protected fields in biotools.json files +# Usage: ./validate-biotools-fields.sh [file2] [file3] ... +# Environment: GITHUB_BEFORE_SHA - the commit SHA to compare against + +PROTECTED_FIELDS=( + "biotoolsID" + "biotoolsCURIE" + "additionDate" + "collectionID" + "elixirPlatform" + "elixirNode" + "elixirCommunity" + "lastUpdate" + "owner" + "editPermission" + "validated" + "homepage_status" + "elixir_badge" + "confidence_flag" +) + +failed=false + +for file in "$@"; do + echo "Validating $file..." + + # Check if this is a new file or existing file + if ! git show "${GITHUB_BEFORE_SHA}:${file}" > old.json 2>/dev/null; then + # NEW FILE + echo "ℹ️ New file detected: $file" + for field in "${PROTECTED_FIELDS[@]}"; do + val=$(jq -r ".$field // empty" "$file" 2>/dev/null) + if [ -n "$val" ] && [ "$val" != "null" ]; then + echo "::error file=$file::Protected field '$field' must not be present in new files (found: '$val')" + failed=true + fi + done + else + # EXISTING FILE + echo "ℹ️ Existing file: $file" + cat "$file" > new.json + + for field in "${PROTECTED_FIELDS[@]}"; do + old_val=$(jq -r ".$field // empty" old.json 2>/dev/null) + new_val=$(jq -r ".$field // empty" new.json 2>/dev/null) + if [ "$old_val" != "$new_val" ]; then + echo "::error file=$file::Protected field '$field' was modified (old: '$old_val', new: '$new_val')" + failed=true + fi + done + + rm -f old.json new.json + fi +done + +if [ "$failed" = true ]; then + echo "::error::Validation failed - protected fields were modified or present in new files" + exit 1 +else + echo "✅ All validation checks passed" +fi From b79ae60347d35da82a7dac9b808ccae61813a236 Mon Sep 17 00:00:00 2001 From: anitamnd Date: Thu, 26 Mar 2026 14:30:05 +0100 Subject: [PATCH 2/4] refactor: create BioToolsClient and improve tool validation process --- scripts/runbiotools/gh2biotools.py | 469 +++++++++++++++++++++-------- 1 file changed, 351 insertions(+), 118 deletions(-) diff --git a/scripts/runbiotools/gh2biotools.py b/scripts/runbiotools/gh2biotools.py index 8bc1763..6bc01b8 100644 --- a/scripts/runbiotools/gh2biotools.py +++ b/scripts/runbiotools/gh2biotools.py @@ -3,109 +3,326 @@ import json import logging import argparse +import time +from functools import wraps import requests from boltons.iterutils import remap HEADERS = {"Content-Type": "application/json", "Accept": "application/json"} -HOST = "https://bio.tools" -TOOL_API_URL = f"{HOST}/api/tool/" - -logging.basicConfig(level=logging.INFO) - - -def validate_upload_tool(tool, headers): - url = f"{HOST}/api/tool/validate/" - response = requests.post(url, headers=headers, data=json.dumps(tool)) - - if not response.ok: - logging.error( - f"Error validating upload for {tool['biotoolsID']}: {response.status_code} {response.text}" - ) - return response.ok +DEFAULT_HOST = "https://bio.tools" +REQUEST_TIMEOUT = 30 +RATE_LIMIT_DELAY = 0.5 +MAX_RETRIES = 3 +RETRY_BACKOFF_FACTOR = 2 -def upload_tool(tool, headers): - url = TOOL_API_URL - - response = requests.post(url, headers=headers, data=json.dumps(tool)) - return response.ok - - -def validate_update_tool(tool, tool_id, headers): - url = f"{HOST}/api/{tool_id}/validate/" - response = requests.put(url, headers=headers, data=json.dumps(tool)) - - if not response.ok: - logging.error( - f"Error validating update for {tool['biotoolsID']}: {response.status_code} {response.text}" - ) - return response.ok - - -def update_tool(tool, headers): - """Updates an existing tool on bio.tools.""" - url = f"{TOOL_API_URL}{tool['biotoolsID']}/" - - response = requests.put(url, headers=headers, data=json.dumps(tool)) - return response.ok +logging.basicConfig(level=logging.INFO) -def process_single_file(file, headers): - """ - Process a single tool file. - returns tool_id, status - status can be "uploaded", "updated", "unchanged", "failed", "failed_validation", "failed_upload" or "failed_update" +def retry_on_failure(max_retries=MAX_RETRIES, backoff_factor=RETRY_BACKOFF_FACTOR): + """Decorator to retry failed requests with exponential backoff. + + Args: + max_retries: Maximum number of retry attempts. + backoff_factor: Multiplier for exponential backoff delay. """ - payload_dict = json.load(file) - tool_id = payload_dict.get("biotoolsID") - - if not tool_id: - logging.error(f"'biotoolsID' not found in {file}") - return "UNKNOWN", "failed" - - # check if tool exists - tool_url = f"{HOST}/api/tool/{tool_id}/" - response = requests.get(tool_url, headers=headers) - - if response.status_code == 200: - # remove empty fields - existing_tool = remap(response.json(), lambda p, k, v: bool(v)) - payload_dict = remap(payload_dict, lambda p, k, v: bool(v)) - - if existing_tool == payload_dict: - return tool_id, "unchanged" - - valid = validate_update_tool(payload_dict, tool_id, headers) - if not valid: - return tool_id, "failed_validation" - - success = update_tool(payload_dict, headers) - - return tool_id, "updated" if success else "failed_update" - - elif response.status_code == 404: - # tool not registered, proceed with upload - logging.info(f"Tool {tool_id} not registered, proceeding with upload") - valid = validate_upload_tool(payload_dict, headers) - - if not valid: - return tool_id, "failed_validation" - - success = upload_tool(payload_dict, headers) - - return tool_id, "uploaded" if success else "failed_upload" - - else: - logging.error( - f"Error retrieving tool {tool_id}: {response.status_code} {response.text}" - ) - return tool_id, "failed" + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except requests.exceptions.RequestException as e: + if attempt == max_retries - 1: + raise + + delay = backoff_factor ** attempt + logging.warning( + f"Request failed (attempt {attempt + 1}/{max_retries}): {e}. " + f"Retrying in {delay}s..." + ) + time.sleep(delay) + + return wrapper + return decorator + + +class BioToolsClient: + """Client for interacting with the bio.tools API.""" + + def __init__(self, token=None, host=None, rate_limit_delay=RATE_LIMIT_DELAY): + """Initialize the bio.tools client. + + Args: + token: API token for bio.tools. If not provided, reads from + BIOTOOLS_API_TOKEN environment variable. + host: Base URL for the bio.tools API. Defaults to production. + Use 'https://bio-tools-dev.sdu.dk' for dev server. + rate_limit_delay: Delay in seconds between API operations. + + Raises: + ValueError: If no token is provided or found in environment. + """ + token = token or os.environ.get("BIOTOOLS_API_TOKEN") + if not token: + raise ValueError( + "BIOTOOLS_API_TOKEN is required. Set environment variable or pass token parameter." + ) + + self.host = (host or DEFAULT_HOST).rstrip('/') + self.headers = {**HEADERS, "Authorization": f"Token {token}"} + self.rate_limit_delay = rate_limit_delay + + logging.info(f"Using bio.tools API at: {self.host}") + + @retry_on_failure() + def get_tool(self, tool_id): + """Retrieve a tool from bio.tools. + + Args: + tool_id: The biotoolsID of the tool to retrieve. + + Returns: + dict: Tool metadata if found, None otherwise. + + Raises: + requests.RequestException + """ + url = f"{self.host}/api/tool/{tool_id}/" + + try: + response = requests.get(url, headers=self.headers, timeout=REQUEST_TIMEOUT) + + if response.status_code == 200: + return response.json() + elif response.status_code == 404: + return None + else: + logging.error( + f"Error retrieving tool {tool_id}: {response.status_code} {response.text}" + ) + return None + except requests.RequestException as e: + logging.error(f"Request failed for tool {tool_id}: {e}") + raise + + def validate_upload(self, tool): + """Validate a tool before uploading to bio.tools. + + Args: + tool: Dictionary containing tool metadata with biotoolsID. + + Returns: + bool: True if validation succeeds, False otherwise. + """ + url = f"{self.host}/api/tool/validate/" + + try: + response = requests.post( + url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + ) + + if not response.ok: + logging.error( + f"Error validating upload for {tool.get('biotoolsID', 'UNKNOWN')}: " + f"{response.status_code} {response.text}" + ) + + return response.ok + except requests.RequestException as e: + logging.error(f"Validation request failed: {e}") + return False + + def validate_update(self, tool_id, tool): + """Validate a tool before updating on bio.tools. + + Args: + tool_id: The biotoolsID of the tool to update. + tool: Dictionary containing updated tool metadata. + + Returns: + bool: True if validation succeeds, False otherwise. + """ + url = f"{self.host}/api/{tool_id}/validate/" + + try: + response = requests.put( + url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + ) + + if not response.ok: + logging.error( + f"Error validating update for {tool.get('biotoolsID', tool_id)}: " + f"{response.status_code} {response.text}" + ) + + return response.ok + except requests.RequestException as e: + logging.error(f"Validation request failed for {tool_id}: {e}") + return False + + def upload_tool(self, tool): + """Upload a new tool to bio.tools. + + Args: + tool: Dictionary containing tool metadata. + + Returns: + bool: True if upload succeeds, False otherwise. + """ + url = f"{self.host}/api/tool/" + + try: + response = requests.post( + url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + ) + + if not response.ok: + logging.error( + f"Error uploading tool {tool.get('biotoolsID', 'UNKNOWN')}: " + f"{response.status_code} {response.text}" + ) + + return response.ok + except requests.RequestException as e: + logging.error(f"Upload request failed: {e}") + return False + + def update_tool(self, tool): + """Update an existing tool on bio.tools. + + Args: + tool: Dictionary containing updated tool metadata with biotoolsID. + + Returns: + bool: True if update succeeds, False otherwise. + """ + tool_id = tool.get('biotoolsID') + if not tool_id: + logging.error("Cannot update tool without biotoolsID") + return False + + url = f"{self.host}/api/tool/{tool_id}/" + + try: + response = requests.put( + url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + ) + + if not response.ok: + logging.error( + f"Error updating tool {tool_id}: {response.status_code} {response.text}" + ) + + return response.ok + except requests.RequestException as e: + logging.error(f"Update request failed for {tool_id}: {e}") + return False + + def process_tool_file(self, file_path): + """Process a single tool file. + + Args: + file_path: Path to a JSON file containing tool metadata. + + Returns: + tuple: (tool_id, status) where status can be: + "uploaded", "updated", "unchanged", "failed", + "failed_validation", "failed_upload", or "failed_update" + """ + try: + with open(file_path, 'r') as file: + payload_dict = json.load(file) + except (IOError, json.JSONDecodeError) as e: + logging.error(f"Error reading file {file_path}: {e}") + return "UNKNOWN", "failed" + + tool_id = payload_dict.get("biotoolsID") + if not tool_id: + logging.error(f"'biotoolsID' not found in {file_path}") + return "UNKNOWN", "failed" + + # Check if tool exists + try: + existing_tool = self.get_tool(tool_id) + except requests.RequestException: + return tool_id, "failed" + + if existing_tool: + # Tool exists, check if update is needed + # Remove empty fields for comparison + existing_tool_clean = remap(existing_tool, lambda p, k, v: bool(v)) + payload_dict_clean = remap(payload_dict, lambda p, k, v: bool(v)) + + if existing_tool_clean == payload_dict_clean: + logging.info(f"Tool {tool_id} is unchanged") + return tool_id, "unchanged" + + # Validate and update + if not self.validate_update(tool_id, payload_dict): + return tool_id, "failed_validation" + + success = self.update_tool(payload_dict) + if success: + logging.info(f"Tool {tool_id} updated successfully") + return tool_id, "updated" + else: + return tool_id, "failed_update" + else: + # Tool doesn't exist, proceed with upload + logging.info(f"Tool {tool_id} not registered, proceeding with upload") + + if not self.validate_upload(payload_dict): + return tool_id, "failed_validation" + + success = self.upload_tool(payload_dict) + if success: + logging.info(f"Tool {tool_id} uploaded successfully") + return tool_id, "uploaded" + else: + return tool_id, "failed_upload" + + def sync_tools(self, file_paths): + """Process multiple tool files and sync them to bio.tools. + + Args: + file_paths: List of paths to JSON files containing tool metadata. + + Returns: + dict: Results dictionary with lists of tool IDs categorized by status. + """ + results = { + "uploaded": [], + "updated": [], + "unchanged": [], + "failed": [], + "failed_validation": [], + "failed_upload": [], + "failed_update": [], + } + + total_files = len(file_paths) + logging.info(f"Starting sync of {total_files} file(s)") + + for index, file_path in enumerate(file_paths, start=1): + logging.info(f"[{index}/{total_files}] Processing {file_path}") + + tool_id, status = self.process_tool_file(file_path) + results[status].append(tool_id) + time.sleep(self.rate_limit_delay) + + return results def print_summary(results): - """Print a summary of the upload results.""" - logging.info("---------------------------") + """Print a summary of the sync results. + + Args: + results: Dictionary containing lists of tool IDs categorized by status. + """ + logging.info("="*50) logging.info("SUMMARY") + logging.info("="*50) logging.info(f"Tools uploaded: {len(results['uploaded'])}") logging.info(f"Tools updated: {len(results['updated'])}") logging.info(f"Tools unchanged: {len(results['unchanged'])}") @@ -117,6 +334,7 @@ def print_summary(results): logging.info( f"Tools failed update after validation: {len(results['failed_update'])}" ) + logging.info("="*50) if results["uploaded"]: logging.info(f"Uploaded tools: {', '.join(results['uploaded'])}") @@ -134,32 +352,8 @@ def print_summary(results): logging.error(f"Failed update tools: {', '.join(results['failed_update'])}") -def run_upload(files): - token = os.environ.get("BIOTOOLS_API_TOKEN") - if not token: - logging.error("Missing BIOTOOLS_API_TOKEN. Aborting upload.") - raise SystemExit(1) - - headers = {**HEADERS, "Authorization": f"Token {token}"} - results = { - "uploaded": [], - "updated": [], - "unchanged": [], - "failed": [], - "failed_validation": [], - "failed_upload": [], - "failed_update": [], - } - - for json_file in files: - with open(json_file, "r") as file: - tool_id, status = process_single_file(file, headers) - results[status].append(tool_id) - - print_summary(results) - - -if __name__ == "__main__": +def main(): + """Main entry point for the script.""" parser = argparse.ArgumentParser( description="Sync changed .biotools.json files with bio.tools server" ) @@ -169,10 +363,49 @@ def run_upload(files): metavar="F", type=str, nargs="+", + required=True, help="List of changed/created .biotools.json files to process", ) + + parser.add_argument( + "--host", + type=str, + default=None, + help=f"Bio.tools API host URL (default: {DEFAULT_HOST}). " + "Use https://bio-tools-dev.sdu.dk for dev server", + ) args = parser.parse_args() - if args.files: - run_upload(args.files) + try: + client = BioToolsClient(host=args.host) + + results = client.sync_tools(args.files) + + print_summary(results) + + # Exit with error code if any operations failed + total_failures = ( + len(results["failed"]) + + len(results["failed_validation"]) + + len(results["failed_upload"]) + + len(results["failed_update"]) + ) + + if total_failures > 0: + logging.error(f"Completed with {total_failures} failure(s)") + raise SystemExit(1) + else: + logging.info("All operations completed successfully") + raise SystemExit(0) + + except ValueError as e: + logging.error(str(e)) + raise SystemExit(1) + except Exception as e: + logging.error(f"Unexpected error: {e}") + raise SystemExit(1) + + +if __name__ == "__main__": + main() From 26259d25b9ef30448a8ba0d0a66f040800c2d68e Mon Sep 17 00:00:00 2001 From: anitamnd Date: Thu, 26 Mar 2026 15:31:42 +0100 Subject: [PATCH 3/4] fix: skip protected fields check for new biotools.json files --- scripts/runbiotools/validate-biotools-fields.sh | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/scripts/runbiotools/validate-biotools-fields.sh b/scripts/runbiotools/validate-biotools-fields.sh index ba39b39..4a59b99 100644 --- a/scripts/runbiotools/validate-biotools-fields.sh +++ b/scripts/runbiotools/validate-biotools-fields.sh @@ -30,17 +30,11 @@ for file in "$@"; do # Check if this is a new file or existing file if ! git show "${GITHUB_BEFORE_SHA}:${file}" > old.json 2>/dev/null; then # NEW FILE - echo "ℹ️ New file detected: $file" - for field in "${PROTECTED_FIELDS[@]}"; do - val=$(jq -r ".$field // empty" "$file" 2>/dev/null) - if [ -n "$val" ] && [ "$val" != "null" ]; then - echo "::error file=$file::Protected field '$field' must not be present in new files (found: '$val')" - failed=true - fi - done + echo "ℹ New file detected: $file" + echo "ℹ Skipping validation for protected fields..." else # EXISTING FILE - echo "ℹ️ Existing file: $file" + echo "ℹ Existing file: $file" cat "$file" > new.json for field in "${PROTECTED_FIELDS[@]}"; do @@ -57,7 +51,7 @@ for file in "$@"; do done if [ "$failed" = true ]; then - echo "::error::Validation failed - protected fields were modified or present in new files" + echo "::error::Validation failed - protected fields were modified." exit 1 else echo "✅ All validation checks passed" From 23d29f5d67d3255d9f1adfd30880c7a1a16ee9ec Mon Sep 17 00:00:00 2001 From: anitamnd Date: Thu, 26 Mar 2026 15:43:30 +0100 Subject: [PATCH 4/4] apply ruff formatting --- scripts/runbiotools/gh2biotools.py | 160 ++++++++++++++++------------- 1 file changed, 87 insertions(+), 73 deletions(-) diff --git a/scripts/runbiotools/gh2biotools.py b/scripts/runbiotools/gh2biotools.py index 6bc01b8..2d41fe2 100644 --- a/scripts/runbiotools/gh2biotools.py +++ b/scripts/runbiotools/gh2biotools.py @@ -21,11 +21,12 @@ def retry_on_failure(max_retries=MAX_RETRIES, backoff_factor=RETRY_BACKOFF_FACTOR): """Decorator to retry failed requests with exponential backoff. - + Args: max_retries: Maximum number of retry attempts. backoff_factor: Multiplier for exponential backoff delay. """ + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -35,15 +36,16 @@ def wrapper(*args, **kwargs): except requests.exceptions.RequestException as e: if attempt == max_retries - 1: raise - - delay = backoff_factor ** attempt + + delay = backoff_factor**attempt logging.warning( f"Request failed (attempt {attempt + 1}/{max_retries}): {e}. " f"Retrying in {delay}s..." ) time.sleep(delay) - + return wrapper + return decorator @@ -52,14 +54,14 @@ class BioToolsClient: def __init__(self, token=None, host=None, rate_limit_delay=RATE_LIMIT_DELAY): """Initialize the bio.tools client. - + Args: - token: API token for bio.tools. If not provided, reads from + token: API token for bio.tools. If not provided, reads from BIOTOOLS_API_TOKEN environment variable. host: Base URL for the bio.tools API. Defaults to production. Use 'https://bio-tools-dev.sdu.dk' for dev server. rate_limit_delay: Delay in seconds between API operations. - + Raises: ValueError: If no token is provided or found in environment. """ @@ -68,31 +70,31 @@ def __init__(self, token=None, host=None, rate_limit_delay=RATE_LIMIT_DELAY): raise ValueError( "BIOTOOLS_API_TOKEN is required. Set environment variable or pass token parameter." ) - - self.host = (host or DEFAULT_HOST).rstrip('/') + + self.host = (host or DEFAULT_HOST).rstrip("/") self.headers = {**HEADERS, "Authorization": f"Token {token}"} self.rate_limit_delay = rate_limit_delay - + logging.info(f"Using bio.tools API at: {self.host}") @retry_on_failure() def get_tool(self, tool_id): """Retrieve a tool from bio.tools. - + Args: tool_id: The biotoolsID of the tool to retrieve. - + Returns: dict: Tool metadata if found, None otherwise. - + Raises: requests.RequestException """ url = f"{self.host}/api/tool/{tool_id}/" - + try: response = requests.get(url, headers=self.headers, timeout=REQUEST_TIMEOUT) - + if response.status_code == 200: return response.json() elif response.status_code == 404: @@ -108,26 +110,29 @@ def get_tool(self, tool_id): def validate_upload(self, tool): """Validate a tool before uploading to bio.tools. - + Args: tool: Dictionary containing tool metadata with biotoolsID. - + Returns: bool: True if validation succeeds, False otherwise. """ url = f"{self.host}/api/tool/validate/" - + try: response = requests.post( - url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + url, + headers=self.headers, + data=json.dumps(tool), + timeout=REQUEST_TIMEOUT, ) - + if not response.ok: logging.error( f"Error validating upload for {tool.get('biotoolsID', 'UNKNOWN')}: " f"{response.status_code} {response.text}" ) - + return response.ok except requests.RequestException as e: logging.error(f"Validation request failed: {e}") @@ -135,27 +140,30 @@ def validate_upload(self, tool): def validate_update(self, tool_id, tool): """Validate a tool before updating on bio.tools. - + Args: tool_id: The biotoolsID of the tool to update. tool: Dictionary containing updated tool metadata. - + Returns: bool: True if validation succeeds, False otherwise. """ url = f"{self.host}/api/{tool_id}/validate/" - + try: response = requests.put( - url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + url, + headers=self.headers, + data=json.dumps(tool), + timeout=REQUEST_TIMEOUT, ) - + if not response.ok: logging.error( f"Error validating update for {tool.get('biotoolsID', tool_id)}: " f"{response.status_code} {response.text}" ) - + return response.ok except requests.RequestException as e: logging.error(f"Validation request failed for {tool_id}: {e}") @@ -163,26 +171,29 @@ def validate_update(self, tool_id, tool): def upload_tool(self, tool): """Upload a new tool to bio.tools. - + Args: tool: Dictionary containing tool metadata. - + Returns: bool: True if upload succeeds, False otherwise. """ url = f"{self.host}/api/tool/" - + try: response = requests.post( - url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + url, + headers=self.headers, + data=json.dumps(tool), + timeout=REQUEST_TIMEOUT, ) - + if not response.ok: logging.error( f"Error uploading tool {tool.get('biotoolsID', 'UNKNOWN')}: " f"{response.status_code} {response.text}" ) - + return response.ok except requests.RequestException as e: logging.error(f"Upload request failed: {e}") @@ -190,30 +201,33 @@ def upload_tool(self, tool): def update_tool(self, tool): """Update an existing tool on bio.tools. - + Args: tool: Dictionary containing updated tool metadata with biotoolsID. - + Returns: bool: True if update succeeds, False otherwise. """ - tool_id = tool.get('biotoolsID') + tool_id = tool.get("biotoolsID") if not tool_id: logging.error("Cannot update tool without biotoolsID") return False - + url = f"{self.host}/api/tool/{tool_id}/" - + try: response = requests.put( - url, headers=self.headers, data=json.dumps(tool), timeout=REQUEST_TIMEOUT + url, + headers=self.headers, + data=json.dumps(tool), + timeout=REQUEST_TIMEOUT, ) - + if not response.ok: logging.error( f"Error updating tool {tool_id}: {response.status_code} {response.text}" ) - + return response.ok except requests.RequestException as e: logging.error(f"Update request failed for {tool_id}: {e}") @@ -221,47 +235,47 @@ def update_tool(self, tool): def process_tool_file(self, file_path): """Process a single tool file. - + Args: file_path: Path to a JSON file containing tool metadata. - + Returns: tuple: (tool_id, status) where status can be: "uploaded", "updated", "unchanged", "failed", "failed_validation", "failed_upload", or "failed_update" """ try: - with open(file_path, 'r') as file: + with open(file_path, "r") as file: payload_dict = json.load(file) except (IOError, json.JSONDecodeError) as e: logging.error(f"Error reading file {file_path}: {e}") return "UNKNOWN", "failed" - + tool_id = payload_dict.get("biotoolsID") if not tool_id: logging.error(f"'biotoolsID' not found in {file_path}") return "UNKNOWN", "failed" - + # Check if tool exists try: existing_tool = self.get_tool(tool_id) except requests.RequestException: return tool_id, "failed" - + if existing_tool: # Tool exists, check if update is needed # Remove empty fields for comparison existing_tool_clean = remap(existing_tool, lambda p, k, v: bool(v)) payload_dict_clean = remap(payload_dict, lambda p, k, v: bool(v)) - + if existing_tool_clean == payload_dict_clean: logging.info(f"Tool {tool_id} is unchanged") return tool_id, "unchanged" - + # Validate and update if not self.validate_update(tool_id, payload_dict): return tool_id, "failed_validation" - + success = self.update_tool(payload_dict) if success: logging.info(f"Tool {tool_id} updated successfully") @@ -271,10 +285,10 @@ def process_tool_file(self, file_path): else: # Tool doesn't exist, proceed with upload logging.info(f"Tool {tool_id} not registered, proceeding with upload") - + if not self.validate_upload(payload_dict): return tool_id, "failed_validation" - + success = self.upload_tool(payload_dict) if success: logging.info(f"Tool {tool_id} uploaded successfully") @@ -284,10 +298,10 @@ def process_tool_file(self, file_path): def sync_tools(self, file_paths): """Process multiple tool files and sync them to bio.tools. - + Args: file_paths: List of paths to JSON files containing tool metadata. - + Returns: dict: Results dictionary with lists of tool IDs categorized by status. """ @@ -300,29 +314,29 @@ def sync_tools(self, file_paths): "failed_upload": [], "failed_update": [], } - + total_files = len(file_paths) logging.info(f"Starting sync of {total_files} file(s)") - + for index, file_path in enumerate(file_paths, start=1): logging.info(f"[{index}/{total_files}] Processing {file_path}") - + tool_id, status = self.process_tool_file(file_path) results[status].append(tool_id) time.sleep(self.rate_limit_delay) - + return results def print_summary(results): """Print a summary of the sync results. - + Args: results: Dictionary containing lists of tool IDs categorized by status. """ - logging.info("="*50) + logging.info("=" * 50) logging.info("SUMMARY") - logging.info("="*50) + logging.info("=" * 50) logging.info(f"Tools uploaded: {len(results['uploaded'])}") logging.info(f"Tools updated: {len(results['updated'])}") logging.info(f"Tools unchanged: {len(results['unchanged'])}") @@ -334,7 +348,7 @@ def print_summary(results): logging.info( f"Tools failed update after validation: {len(results['failed_update'])}" ) - logging.info("="*50) + logging.info("=" * 50) if results["uploaded"]: logging.info(f"Uploaded tools: {', '.join(results['uploaded'])}") @@ -366,39 +380,39 @@ def main(): required=True, help="List of changed/created .biotools.json files to process", ) - + parser.add_argument( "--host", type=str, default=None, help=f"Bio.tools API host URL (default: {DEFAULT_HOST}). " - "Use https://bio-tools-dev.sdu.dk for dev server", + "Use https://bio-tools-dev.sdu.dk for dev server", ) args = parser.parse_args() try: client = BioToolsClient(host=args.host) - + results = client.sync_tools(args.files) - + print_summary(results) - + # Exit with error code if any operations failed total_failures = ( - len(results["failed"]) + - len(results["failed_validation"]) + - len(results["failed_upload"]) + - len(results["failed_update"]) + len(results["failed"]) + + len(results["failed_validation"]) + + len(results["failed_upload"]) + + len(results["failed_update"]) ) - + if total_failures > 0: logging.error(f"Completed with {total_failures} failure(s)") raise SystemExit(1) else: logging.info("All operations completed successfully") raise SystemExit(0) - + except ValueError as e: logging.error(str(e)) raise SystemExit(1)