diff --git a/bioconda-import/bioconda_importer.py b/bioconda-import/bioconda_importer.py index d2aa19f..ba9fbc1 100644 --- a/bioconda-import/bioconda_importer.py +++ b/bioconda-import/bioconda_importer.py @@ -1,11 +1,15 @@ #!/usr/bin/env python import os +import sys import yaml import argparse from pathlib import Path import jinja2 +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from common.metadata import normalize_version_fields + def clean(content_path): import_directory = os.path.join(content_path, "imports", "bioconda") @@ -67,6 +71,7 @@ def merge(conda, content_path): biotools_data_path = os.path.join(content_path, "data") for name, data in conda.items(): try: + data = normalize_version_fields(data, ["package.version"]) package_name = data["package"]["name"] import_file_path = os.path.join( bioconda_import_path, f"bioconda_{package_name}.yaml" diff --git a/bioconductor-import/import.py b/bioconductor-import/import.py index 13b6568..b32f837 100644 --- a/bioconductor-import/import.py +++ b/bioconductor-import/import.py @@ -2,10 +2,14 @@ import glob import json import os +import sys import requests import logging import yaml +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from common.metadata import normalize_version_fields + # Set up logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" @@ -99,6 +103,7 @@ def retrieve(version, filters=None): ) try: + pack = normalize_version_fields(pack, ["Version"]) with open(path, "w") as write_file: json.dump( pack, write_file, sort_keys=True, indent=4, separators=(",", ": ") diff --git a/biotools-import/import.py b/biotools-import/import.py index 199d677..5cf8598 100644 --- a/biotools-import/import.py +++ b/biotools-import/import.py @@ -1,11 +1,15 @@ import json import os +import sys import glob import argparse import requests from boltons.iterutils import remap +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from common.metadata import normalize_version_fields + BIOTOOLS_DOMAIN = "https://bio.tools" SSL_VERIFY = True @@ -62,6 +66,8 @@ def drop_false(path, key, value): return bool(value) tool_cleaned = remap(tool, visit=drop_false) + tool_cleaned = normalize_version_fields(tool_cleaned, ["version"]) + json.dump( tool_cleaned, write_file, diff --git a/common/metadata.py b/common/metadata.py new file mode 100644 index 0000000..b33a010 --- /dev/null +++ b/common/metadata.py @@ -0,0 +1,107 @@ +import logging + + +def normalize_version_to_string(value): + """ + Recursively convert version values to strings. + + This function processes version data by converting numeric types to strings + while preserving None and boolean values. It recursively processes nested + structures (lists and dicts). + + Args: + value: The value to normalize. Can be any type. + + Returns: + - None and bool values are returned unchanged + - int and float values are converted to strings + - Lists are processed recursively, returning a new list with normalized values + - Dicts are processed recursively, returning a new dict with normalized values + - Other types are returned unchanged + + Examples: + >>> normalize_version_to_string(1) + '1' + >>> normalize_version_to_string([1, 2, 3]) + ['1', '2', '3'] + >>> normalize_version_to_string({'version': 1.5}) + {'version': '1.5'} + """ + if value is None or isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return str(value) + if isinstance(value, list): + return [normalize_version_to_string(v) for v in value] + if isinstance(value, dict): + return {k: normalize_version_to_string(v) for k, v in value.items()} + return value + + +def normalize_version_fields(data, field_paths): + """ + Normalize version fields to strings in a data dictionary. + + This function takes a dictionary and a collection of field paths, then normalizes + the version values at those paths to strings using normalize_version_to_string. + + Args: + data (dict): The dictionary to process. + field_paths (iterable): An iterable of field path strings. Supports: + - Simple fields: "version" + - Nested fields: "tool.version" + - List fields: "versions[]" + - List item nested fields: "versions[].version" + + Returns: + dict: The modified data dictionary with normalized version fields. + + Raises: + TypeError: If data is not a dictionary. + + Examples: + >>> data = {"version": 1, "versions": [{"version": 2}]} + >>> normalize_version_fields(data, ["version", "versions[].version"]) + {'version': '1', 'versions': [{'version': '2'}]} + """ + if not isinstance(data, dict): + raise TypeError(f"Expected dict, got {type(data).__name__}") + + for field_path in field_paths: + try: + if "[" in field_path: + if "[]." not in field_path: + list_key = ( + field_path[:-2] if field_path.endswith("[]") else field_path + ) + if list_key in data and isinstance(data[list_key], list): + data[list_key] = normalize_version_to_string(data[list_key]) + else: + list_key, item_path = field_path.split("[].", 1) + if list_key in data and isinstance(data[list_key], list): + for item in data[list_key]: + if isinstance(item, dict) and item_path in item: + item[item_path] = normalize_version_to_string( + item[item_path] + ) + elif "." in field_path: + keys = field_path.split(".") + current = data + for key in keys[:-1]: + if not isinstance(current, dict) or key not in current: + break + current = current[key] + else: + final_key = keys[-1] + if isinstance(current, dict) and final_key in current: + current[final_key] = normalize_version_to_string( + current[final_key] + ) + else: + if field_path in data: + data[field_path] = normalize_version_to_string(data[field_path]) + except (KeyError, TypeError, IndexError, AttributeError) as e: + logging.debug(f"Skipping field path '{field_path}': {e}") + continue + + return data diff --git a/galaxytool-import/galaxytool-import.py b/galaxytool-import/galaxytool-import.py index e5ee60d..a072841 100644 --- a/galaxytool-import/galaxytool-import.py +++ b/galaxytool-import/galaxytool-import.py @@ -1,9 +1,13 @@ import glob import json import os +import sys import requests +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from common.metadata import normalize_version_fields + GALAXY_ALL_TOOLS_METADATA = "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/tools.json" GALAXY_ALL_WORKFLOWS_METADATA = "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/workflows.json" @@ -72,6 +76,14 @@ def retrieve(): # store tool json in galaxy import folder galaxy_tool_id = galaxy_tool_id.lower() tool_cleaned = {k.replace(" ", "_"): v for k, v in tool.items()} + tool_cleaned = normalize_version_fields( + tool_cleaned, + [ + "Suite_version", + "Latest_suite_conda_package_version", + "Related_Workflows[].latest_version", + ], + ) save_path = os.path.join(galaxy_directory, f"{galaxy_tool_id}.galaxy.json") with open(save_path, "w") as write_file: json.dump(