From 1ed57b609a6ad642eb417a137c2101ab95ac9c01 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 24 Dec 2025 09:34:54 -0500 Subject: [PATCH 1/4] feat: add publication format and DOI validation checks Add two new inspector checks for related_publications metadata: - check_publication_list_format: detects comma-separated DOIs/URLs that should be separate list entries - check_publication_doi_resolves: verifies DOI URLs resolve to valid publications via network requests Includes documentation updates for best practices guidance. Closes #419 --- CHANGELOG.md | 2 + docs/best_practices/nwbfile_metadata.rst | 11 +- src/nwbinspector/checks/__init__.py | 4 + src/nwbinspector/checks/_nwbfile_metadata.py | 117 +++++++++++ tests/unit_tests/test_nwbfile_metadata.py | 192 +++++++++++++++++++ 5 files changed, 325 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4219cf3..094e77bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ * Added `check_units_table_duration` to detect if the duration of spike times in a Units table exceeds a threshold (default: 1 year), which may indicate spike_times are in the wrong units or there is a data quality issue. * Added `check_time_intervals_duration`, which makes sure that `TimeInterval` objects do not have a duration greater than 1 year. [#635](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/635) +* Added `check_publication_list_format` to detect comma-separated DOIs/URLs in `related_publications` entries that should be separate list entries. [#419](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/419) +* Added `check_publication_doi_resolves` to verify that DOI URLs in `related_publications` actually resolve to valid publications. [#419](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/419) ### Improvements * Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) diff --git a/docs/best_practices/nwbfile_metadata.rst b/docs/best_practices/nwbfile_metadata.rst index 8d74b1cc..f8407551 100644 --- a/docs/best_practices/nwbfile_metadata.rst +++ b/docs/best_practices/nwbfile_metadata.rst @@ -181,7 +181,16 @@ of the form ``'doi: ###'`` or as an external link of the form ``'http://dx.doi.o This allows metadata collection programs, such as those on the :dandi-archive:`DANDI archive <>` to easily form direct hyperlinks to the publications. -Check function: :py:meth:`~nwbinspector.checks._nwbfile_metadata.check_doi_publications` +Each publication should be a separate entry in the list. Do not combine multiple DOIs or URLs into a single +comma-separated string. For example, use ``["https://doi.org/10.1234/abc", "https://doi.org/10.5678/def"]`` instead of +``["https://doi.org/10.1234/abc,https://doi.org/10.5678/def"]``. + +DOIs should be valid and resolvable. The inspector can verify that DOI URLs actually resolve to valid publications +by making network requests to the DOI resolver service. + +Check functions: :py:meth:`~nwbinspector.checks._nwbfile_metadata.check_doi_publications`, +:py:meth:`~nwbinspector.checks._nwbfile_metadata.check_publication_list_format`, and +:py:meth:`~nwbinspector.checks._nwbfile_metadata.check_publication_doi_resolves` diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index 2ad96fbc..a424078e 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -44,6 +44,8 @@ check_institution, check_keywords, check_processing_module_name, + check_publication_doi_resolves, + check_publication_list_format, check_session_id_no_slashes, check_session_start_time_future_date, check_session_start_time_old_date, @@ -128,6 +130,8 @@ "check_session_start_time_future_date", "check_processing_module_name", "check_session_start_time_old_date", + "check_publication_list_format", + "check_publication_doi_resolves", "check_optogenetic_stimulus_site_has_optogenetic_series", "check_excitation_lambda_in_nm", "check_plane_segmentation_image_mask_shape_against_ref_images", diff --git a/src/nwbinspector/checks/_nwbfile_metadata.py b/src/nwbinspector/checks/_nwbfile_metadata.py index 1b7f7bb9..275daef3 100644 --- a/src/nwbinspector/checks/_nwbfile_metadata.py +++ b/src/nwbinspector/checks/_nwbfile_metadata.py @@ -4,6 +4,8 @@ from datetime import datetime from pathlib import Path from typing import Iterable, Optional +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen from hdmf_zarr import NWBZarrIO from isodate import Duration, parse_duration @@ -164,6 +166,121 @@ def check_doi_publications(nwbfile: NWBFile) -> Optional[Iterable[InspectorMessa return None +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=NWBFile) +def check_publication_list_format(nwbfile: NWBFile) -> Optional[Iterable[InspectorMessage]]: + """ + Check if related_publications entries contain comma-separated values that should be separate list entries. + + Best Practice: :ref:`best_practice_doi_publications` + """ + if not nwbfile.related_publications: + return None + for publication in nwbfile.related_publications: + publication = publication.decode() if isinstance(publication, bytes) else publication + # Check for comma-separated DOIs or URLs within a single entry + # Look for patterns like "doi:xxx,doi:yyy" or "https://doi.org/xxx,https://doi.org/yyy" + if "," in publication: + # Check if the comma appears to separate multiple DOIs/URLs + parts = [p.strip() for p in publication.split(",")] + doi_indicators = ["doi:", "doi.org/", "dx.doi.org/"] + doi_like_parts = [ + part for part in parts if any(indicator in part.lower() for indicator in doi_indicators) + ] + if len(doi_like_parts) > 1: + yield InspectorMessage( + message=( + f"Metadata /general/related_publications contains a comma-separated list '{publication}'. " + "Each publication should be a separate entry in the list, not combined in a single string." + ) + ) + + return None + + +def _convert_doi_to_url(doi_string: str) -> Optional[str]: + """ + Convert a DOI string to a resolvable URL. + + Handles formats like: + - "doi:10.1234/abc" -> "https://doi.org/10.1234/abc" + - "https://doi.org/10.1234/abc" -> "https://doi.org/10.1234/abc" + - "http://dx.doi.org/10.1234/abc" -> "http://dx.doi.org/10.1234/abc" + + Returns None if the string is not a recognizable DOI format. + """ + doi_string = doi_string.strip() + + if doi_string.startswith("https://doi.org/") or doi_string.startswith("http://dx.doi.org/"): + return doi_string + elif doi_string.startswith("doi:"): + # Extract the DOI identifier after "doi:" + doi_id = doi_string[4:].strip() + return f"https://doi.org/{doi_id}" + + return None + + +def _check_url_resolves(url: str, timeout: int = 10) -> tuple[bool, Optional[str]]: + """ + Check if a URL resolves by making a HEAD request. + + Returns a tuple of (success, error_message). + """ + try: + request = Request(url, method="HEAD") + request.add_header("User-Agent", "NWBInspector/1.0") + with urlopen(request, timeout=timeout) as response: + # 2xx and 3xx status codes are considered successful + if response.status < 400: + return True, None + return False, f"HTTP {response.status}" + except HTTPError as e: + return False, f"HTTP {e.code}" + except URLError as e: + return False, str(e.reason) + except TimeoutError: + return False, "Request timed out" + except Exception as e: + return False, str(e) + + +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=NWBFile) +def check_publication_doi_resolves(nwbfile: NWBFile) -> Optional[Iterable[InspectorMessage]]: + """ + Check if DOI URLs in related_publications actually resolve. + + This check makes network requests to verify that DOI URLs are valid and accessible. + + Best Practice: :ref:`best_practice_doi_publications` + """ + if not nwbfile.related_publications: + return None + + valid_starts = ["doi:", "http://dx.doi.org/", "https://doi.org/"] + + for publication in nwbfile.related_publications: + publication = publication.decode() if isinstance(publication, bytes) else publication + + # Only check entries that look like DOIs + if not any(publication.startswith(valid_start) for valid_start in valid_starts): + continue + + url = _convert_doi_to_url(publication) + if url is None: + continue + + resolves, error = _check_url_resolves(url) + if not resolves: + yield InspectorMessage( + message=( + f"Metadata /general/related_publications DOI '{publication}' does not resolve. " + f"Error: {error}. Please verify the DOI is correct." + ) + ) + + return None + + @register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=Subject) def check_subject_age(subject: Subject) -> Optional[InspectorMessage]: """Check if the Subject age is in ISO 8601 or our extension of it for ranges.""" diff --git a/tests/unit_tests/test_nwbfile_metadata.py b/tests/unit_tests/test_nwbfile_metadata.py index 1607af9e..b1ed5830 100644 --- a/tests/unit_tests/test_nwbfile_metadata.py +++ b/tests/unit_tests/test_nwbfile_metadata.py @@ -7,6 +7,10 @@ from pynwb.file import Subject from nwbinspector import Importance, InspectorMessage +import os + +import pytest + from nwbinspector.checks import ( check_doi_publications, check_experiment_description, @@ -16,6 +20,8 @@ check_institution, check_keywords, check_processing_module_name, + check_publication_doi_resolves, + check_publication_list_format, check_session_id_no_slashes, check_session_start_time_future_date, check_session_start_time_old_date, @@ -268,6 +274,192 @@ def test_check_doi_publications_multiple_fail(): ] +def test_check_publication_list_format_pass(): + """Test that properly formatted publications pass the check.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=["https://doi.org/10.1234/abc", "https://doi.org/10.5678/def"], + ) + assert check_publication_list_format(nwbfile) is None + + +def test_check_publication_list_format_pass_single_comma_in_title(): + """Test that a single publication with a comma in the title passes (not multiple DOIs).""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=["Some publication title, with comma"], + ) + assert check_publication_list_format(nwbfile) is None + + +def test_check_publication_list_format_pass_no_publications(): + """Test that no related_publications passes the check.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + ) + assert check_publication_list_format(nwbfile) is None + + +def test_check_publication_list_format_fail_comma_separated_doi_urls(): + """Test detection of comma-separated DOI URLs in a single entry.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=["https://doi.org/10.1234/abc,https://doi.org/10.5678/def"], + ) + assert check_publication_list_format(nwbfile) == [ + InspectorMessage( + message=( + "Metadata /general/related_publications contains a comma-separated list " + "'https://doi.org/10.1234/abc,https://doi.org/10.5678/def'. " + "Each publication should be a separate entry in the list, not combined in a single string." + ), + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_publication_list_format", + object_type="NWBFile", + object_name="root", + location="/", + ) + ] + + +def test_check_publication_list_format_fail_comma_separated_doi_prefix(): + """Test detection of comma-separated DOI prefixes in a single entry.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=["doi:10.1234/abc, doi:10.5678/def"], + ) + assert check_publication_list_format(nwbfile) == [ + InspectorMessage( + message=( + "Metadata /general/related_publications contains a comma-separated list " + "'doi:10.1234/abc, doi:10.5678/def'. " + "Each publication should be a separate entry in the list, not combined in a single string." + ), + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_publication_list_format", + object_type="NWBFile", + object_name="root", + location="/", + ) + ] + + +def test_check_publication_list_format_bytestring_fail(): + """Test that bytestrings are properly decoded and checked.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=[b"https://doi.org/10.1234/abc,https://doi.org/10.5678/def"], + ) + assert check_publication_list_format(nwbfile) == [ + InspectorMessage( + message=( + "Metadata /general/related_publications contains a comma-separated list " + "'https://doi.org/10.1234/abc,https://doi.org/10.5678/def'. " + "Each publication should be a separate entry in the list, not combined in a single string." + ), + importance=Importance.BEST_PRACTICE_SUGGESTION, + check_function_name="check_publication_list_format", + object_type="NWBFile", + object_name="root", + location="/", + ) + ] + + +# Network-dependent tests for DOI resolution +@pytest.mark.skipif( + os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), + reason="Skipping network-dependent tests", +) +def test_check_publication_doi_resolves_pass(): + """Test that a valid DOI resolves successfully.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + # Use a well-known, stable DOI (the DOI handbook) + related_publications=["https://doi.org/10.1000/182"], + ) + assert check_publication_doi_resolves(nwbfile) is None + + +@pytest.mark.skipif( + os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), + reason="Skipping network-dependent tests", +) +def test_check_publication_doi_resolves_pass_no_publications(): + """Test that no related_publications passes the check.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + ) + assert check_publication_doi_resolves(nwbfile) is None + + +@pytest.mark.skipif( + os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), + reason="Skipping network-dependent tests", +) +def test_check_publication_doi_resolves_fail_invalid_doi(): + """Test that an invalid DOI fails to resolve.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=["https://doi.org/10.1234/this-doi-does-not-exist-abc123xyz"], + ) + results = list(check_publication_doi_resolves(nwbfile)) + assert len(results) == 1 + assert "does not resolve" in results[0].message + assert results[0].importance == Importance.BEST_PRACTICE_SUGGESTION + assert results[0].check_function_name == "check_publication_doi_resolves" + + +@pytest.mark.skipif( + os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), + reason="Skipping network-dependent tests", +) +def test_check_publication_doi_resolves_pass_non_doi_skipped(): + """Test that non-DOI entries are skipped (not checked for resolution).""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=["Some random text that is not a DOI"], + ) + # Non-DOI entries should be skipped, so this should return None + assert check_publication_doi_resolves(nwbfile) is None + + +@pytest.mark.skipif( + os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), + reason="Skipping network-dependent tests", +) +def test_check_publication_doi_resolves_doi_prefix_format(): + """Test that DOIs with 'doi:' prefix are resolved correctly.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + # Use a well-known, stable DOI + related_publications=["doi:10.1000/182"], + ) + assert check_publication_doi_resolves(nwbfile) is None + + def test_check_subject_sex(): nwbfile = NWBFile(session_description="", identifier=str(uuid4()), session_start_time=datetime.now().astimezone()) nwbfile.subject = Subject(subject_id="001") From 452621c5b5c8e001265ba9a65a0960c7ad2f3e33 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Dec 2025 14:35:41 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/nwbinspector/checks/_nwbfile_metadata.py | 4 +--- tests/unit_tests/test_nwbfile_metadata.py | 6 ++---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/nwbinspector/checks/_nwbfile_metadata.py b/src/nwbinspector/checks/_nwbfile_metadata.py index 275daef3..004d10b1 100644 --- a/src/nwbinspector/checks/_nwbfile_metadata.py +++ b/src/nwbinspector/checks/_nwbfile_metadata.py @@ -183,9 +183,7 @@ def check_publication_list_format(nwbfile: NWBFile) -> Optional[Iterable[Inspect # Check if the comma appears to separate multiple DOIs/URLs parts = [p.strip() for p in publication.split(",")] doi_indicators = ["doi:", "doi.org/", "dx.doi.org/"] - doi_like_parts = [ - part for part in parts if any(indicator in part.lower() for indicator in doi_indicators) - ] + doi_like_parts = [part for part in parts if any(indicator in part.lower() for indicator in doi_indicators)] if len(doi_like_parts) > 1: yield InspectorMessage( message=( diff --git a/tests/unit_tests/test_nwbfile_metadata.py b/tests/unit_tests/test_nwbfile_metadata.py index b1ed5830..08cd1aec 100644 --- a/tests/unit_tests/test_nwbfile_metadata.py +++ b/tests/unit_tests/test_nwbfile_metadata.py @@ -1,16 +1,14 @@ +import os import tempfile from datetime import datetime, timezone from uuid import uuid4 +import pytest from hdmf_zarr import NWBZarrIO from pynwb import NWBHDF5IO, NWBFile, ProcessingModule from pynwb.file import Subject from nwbinspector import Importance, InspectorMessage -import os - -import pytest - from nwbinspector.checks import ( check_doi_publications, check_experiment_description, From 7e24fb57b6589b7278600e000b5056a8589f6aad Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 25 Mar 2026 09:39:46 -0400 Subject: [PATCH 3/4] Replace network-dependent DOI tests with mocked versions The original tests hit real DOI endpoints, making them flaky in CI and offline environments. Replace with mock-based tests that cover the same logic paths without network access. Add unit tests for _convert_doi_to_url and _check_url_resolves helpers to cover previously missing lines (HTTP error, URL error, timeout branches). Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/unit_tests/test_nwbfile_metadata.py | 125 +++++++++++++++------- 1 file changed, 88 insertions(+), 37 deletions(-) diff --git a/tests/unit_tests/test_nwbfile_metadata.py b/tests/unit_tests/test_nwbfile_metadata.py index 9b21e2fc..264d2314 100644 --- a/tests/unit_tests/test_nwbfile_metadata.py +++ b/tests/unit_tests/test_nwbfile_metadata.py @@ -1,6 +1,7 @@ import os import tempfile from datetime import datetime, timezone +from unittest.mock import patch from uuid import uuid4 import pytest @@ -33,6 +34,7 @@ check_subject_species_form, check_subject_weight, ) +from nwbinspector.checks._nwbfile_metadata import _check_url_resolves, _convert_doi_to_url from nwbinspector.checks._nwbfile_metadata import PROCESSING_MODULE_CONFIG from nwbinspector.testing import make_minimal_nwbfile @@ -377,27 +379,74 @@ def test_check_publication_list_format_bytestring_fail(): ] -# Network-dependent tests for DOI resolution -@pytest.mark.skipif( - os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), - reason="Skipping network-dependent tests", -) -def test_check_publication_doi_resolves_pass(): - """Test that a valid DOI resolves successfully.""" +# Unit tests for _convert_doi_to_url helper +def test_convert_doi_to_url_https(): + assert _convert_doi_to_url("https://doi.org/10.1234/abc") == "https://doi.org/10.1234/abc" + + +def test_convert_doi_to_url_dx(): + assert _convert_doi_to_url("http://dx.doi.org/10.1234/abc") == "http://dx.doi.org/10.1234/abc" + + +def test_convert_doi_to_url_doi_prefix(): + assert _convert_doi_to_url("doi:10.1234/abc") == "https://doi.org/10.1234/abc" + + +def test_convert_doi_to_url_not_a_doi(): + assert _convert_doi_to_url("not a doi") is None + + +# Unit tests for _check_url_resolves helper (mocked) +def test_check_url_resolves_success(): + with patch("nwbinspector.checks._nwbfile_metadata.urlopen") as mock_urlopen: + mock_urlopen.return_value.__enter__ = lambda s: type("Response", (), {"status": 200})() + mock_urlopen.return_value.__exit__ = lambda s, *a: None + success, error = _check_url_resolves("https://doi.org/10.1234/abc") + assert success is True + assert error is None + + +def test_check_url_resolves_http_error(): + from urllib.error import HTTPError + + with patch("nwbinspector.checks._nwbfile_metadata.urlopen") as mock_urlopen: + mock_urlopen.side_effect = HTTPError(url=None, code=404, msg="Not Found", hdrs=None, fp=None) + success, error = _check_url_resolves("https://doi.org/10.1234/abc") + assert success is False + assert "404" in error + + +def test_check_url_resolves_url_error(): + from urllib.error import URLError + + with patch("nwbinspector.checks._nwbfile_metadata.urlopen") as mock_urlopen: + mock_urlopen.side_effect = URLError(reason="Name or service not known") + success, error = _check_url_resolves("https://doi.org/10.1234/abc") + assert success is False + assert "Name or service not known" in error + + +def test_check_url_resolves_timeout(): + with patch("nwbinspector.checks._nwbfile_metadata.urlopen") as mock_urlopen: + mock_urlopen.side_effect = TimeoutError() + success, error = _check_url_resolves("https://doi.org/10.1234/abc") + assert success is False + assert "timed out" in error + + +# Mock-based tests for check_publication_doi_resolves (no network) +def test_check_publication_doi_resolves_pass_mocked(): + """Test that a valid DOI resolves successfully (mocked).""" nwbfile = NWBFile( session_description="", identifier=str(uuid4()), session_start_time=datetime.now().astimezone(), - # Use a well-known, stable DOI (the DOI handbook) related_publications=["https://doi.org/10.1000/182"], ) - assert check_publication_doi_resolves(nwbfile) is None + with patch("nwbinspector.checks._nwbfile_metadata._check_url_resolves", return_value=(True, None)): + assert check_publication_doi_resolves(nwbfile) is None -@pytest.mark.skipif( - os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), - reason="Skipping network-dependent tests", -) def test_check_publication_doi_resolves_pass_no_publications(): """Test that no related_publications passes the check.""" nwbfile = NWBFile( @@ -408,29 +457,24 @@ def test_check_publication_doi_resolves_pass_no_publications(): assert check_publication_doi_resolves(nwbfile) is None -@pytest.mark.skipif( - os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), - reason="Skipping network-dependent tests", -) -def test_check_publication_doi_resolves_fail_invalid_doi(): - """Test that an invalid DOI fails to resolve.""" +def test_check_publication_doi_resolves_fail_mocked(): + """Test that an invalid DOI fails to resolve (mocked).""" nwbfile = NWBFile( session_description="", identifier=str(uuid4()), session_start_time=datetime.now().astimezone(), related_publications=["https://doi.org/10.1234/this-doi-does-not-exist-abc123xyz"], ) - results = list(check_publication_doi_resolves(nwbfile)) - assert len(results) == 1 - assert "does not resolve" in results[0].message - assert results[0].importance == Importance.BEST_PRACTICE_SUGGESTION - assert results[0].check_function_name == "check_publication_doi_resolves" + with patch( + "nwbinspector.checks._nwbfile_metadata._check_url_resolves", return_value=(False, "HTTP 404") + ): + results = list(check_publication_doi_resolves(nwbfile)) + assert len(results) == 1 + assert "does not resolve" in results[0].message + assert results[0].importance == Importance.BEST_PRACTICE_SUGGESTION + assert results[0].check_function_name == "check_publication_doi_resolves" -@pytest.mark.skipif( - os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), - reason="Skipping network-dependent tests", -) def test_check_publication_doi_resolves_pass_non_doi_skipped(): """Test that non-DOI entries are skipped (not checked for resolution).""" nwbfile = NWBFile( @@ -439,24 +483,31 @@ def test_check_publication_doi_resolves_pass_non_doi_skipped(): session_start_time=datetime.now().astimezone(), related_publications=["Some random text that is not a DOI"], ) - # Non-DOI entries should be skipped, so this should return None assert check_publication_doi_resolves(nwbfile) is None -@pytest.mark.skipif( - os.environ.get("NWBI_SKIP_NETWORK_TESTS", "").lower() in ("1", "true", "yes"), - reason="Skipping network-dependent tests", -) -def test_check_publication_doi_resolves_doi_prefix_format(): - """Test that DOIs with 'doi:' prefix are resolved correctly.""" +def test_check_publication_doi_resolves_doi_prefix_format_mocked(): + """Test that DOIs with 'doi:' prefix are resolved correctly (mocked).""" nwbfile = NWBFile( session_description="", identifier=str(uuid4()), session_start_time=datetime.now().astimezone(), - # Use a well-known, stable DOI related_publications=["doi:10.1000/182"], ) - assert check_publication_doi_resolves(nwbfile) is None + with patch("nwbinspector.checks._nwbfile_metadata._check_url_resolves", return_value=(True, None)): + assert check_publication_doi_resolves(nwbfile) is None + + +def test_check_publication_doi_resolves_bytestring_mocked(): + """Test that bytestrings are properly decoded.""" + nwbfile = NWBFile( + session_description="", + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone(), + related_publications=[b"https://doi.org/10.1000/182"], + ) + with patch("nwbinspector.checks._nwbfile_metadata._check_url_resolves", return_value=(True, None)): + assert check_publication_doi_resolves(nwbfile) is None def test_check_subject_sex(): From c141df2ca4b3f73359a91eb2611438336f433491 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 25 Mar 2026 13:39:58 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit_tests/test_nwbfile_metadata.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/unit_tests/test_nwbfile_metadata.py b/tests/unit_tests/test_nwbfile_metadata.py index 264d2314..8c4c2e06 100644 --- a/tests/unit_tests/test_nwbfile_metadata.py +++ b/tests/unit_tests/test_nwbfile_metadata.py @@ -1,10 +1,8 @@ -import os import tempfile from datetime import datetime, timezone from unittest.mock import patch from uuid import uuid4 -import pytest from hdmf_zarr import NWBZarrIO from pynwb import NWBHDF5IO, NWBFile, ProcessingModule from pynwb.file import Subject @@ -34,8 +32,11 @@ check_subject_species_form, check_subject_weight, ) -from nwbinspector.checks._nwbfile_metadata import _check_url_resolves, _convert_doi_to_url -from nwbinspector.checks._nwbfile_metadata import PROCESSING_MODULE_CONFIG +from nwbinspector.checks._nwbfile_metadata import ( + PROCESSING_MODULE_CONFIG, + _check_url_resolves, + _convert_doi_to_url, +) from nwbinspector.testing import make_minimal_nwbfile minimal_nwbfile = make_minimal_nwbfile() @@ -465,9 +466,7 @@ def test_check_publication_doi_resolves_fail_mocked(): session_start_time=datetime.now().astimezone(), related_publications=["https://doi.org/10.1234/this-doi-does-not-exist-abc123xyz"], ) - with patch( - "nwbinspector.checks._nwbfile_metadata._check_url_resolves", return_value=(False, "HTTP 404") - ): + with patch("nwbinspector.checks._nwbfile_metadata._check_url_resolves", return_value=(False, "HTTP 404")): results = list(check_publication_doi_resolves(nwbfile)) assert len(results) == 1 assert "does not resolve" in results[0].message