Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
* Added `check_image_series_starting_frame_without_external_file` to verify that `starting_frame` is not set when `external_file` is not used in an `ImageSeries`. [#235](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/235)
* Added `check_sweeptable_deprecated` to detect usage of the deprecated `SweepTable` in NWB files with schema version >= 2.4.0, which should use `IntracellularRecordingsTable` instead. [#657](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/657)
* Added `check_time_series_data_is_not_empty` to detect empty `.data` fields in `TimeSeries` containers, which often indicate incomplete data entry or conversion errors. Skips `ImageSeries` with `external_file` set, where empty data is intentional. [#668](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/668)
* Added `check_publication_list_format` to detect comma-separated DOIs/URLs in `related_publications` entries that should be separate list entries. [#419](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/419)
* Added `check_publication_doi_resolves` to verify that DOI URLs in `related_publications` actually resolve to valid publications. [#419](https://github.com/NeurodataWithoutBorders/nwbinspector/issues/419)

### Improvements
* Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624)
Expand Down
11 changes: 10 additions & 1 deletion docs/best_practices/nwbfile_metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,16 @@ of the form ``'doi: ###'`` or as an external link of the form ``'http://dx.doi.o
This allows metadata collection programs, such as those on the :dandi-archive:`DANDI archive <>` to easily form direct
hyperlinks to the publications.

Check function: :py:meth:`~nwbinspector.checks._nwbfile_metadata.check_doi_publications`
Each publication should be a separate entry in the list. Do not combine multiple DOIs or URLs into a single
comma-separated string. For example, use ``["https://doi.org/10.1234/abc", "https://doi.org/10.5678/def"]`` instead of
``["https://doi.org/10.1234/abc,https://doi.org/10.5678/def"]``.

DOIs should be valid and resolvable. The inspector can verify that DOI URLs actually resolve to valid publications
by making network requests to the DOI resolver service.

Check functions: :py:meth:`~nwbinspector.checks._nwbfile_metadata.check_doi_publications`,
:py:meth:`~nwbinspector.checks._nwbfile_metadata.check_publication_list_format`, and
:py:meth:`~nwbinspector.checks._nwbfile_metadata.check_publication_doi_resolves`



Expand Down
4 changes: 4 additions & 0 deletions src/nwbinspector/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
check_institution,
check_keywords,
check_processing_module_name,
check_publication_doi_resolves,
check_publication_list_format,
check_session_id_no_slashes,
check_session_start_time_future_date,
check_session_start_time_old_date,
Expand Down Expand Up @@ -146,6 +148,8 @@
"check_session_start_time_future_date",
"check_processing_module_name",
"check_session_start_time_old_date",
"check_publication_list_format",
"check_publication_doi_resolves",
"check_optogenetic_stimulus_site_has_optogenetic_series",
"check_excitation_lambda_in_nm",
"check_plane_segmentation_image_mask_shape_against_ref_images",
Expand Down
115 changes: 115 additions & 0 deletions src/nwbinspector/checks/_nwbfile_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from datetime import datetime
from pathlib import Path
from typing import Iterable, Optional
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen

from hdmf_zarr import NWBZarrIO
from isodate import Duration, parse_duration
Expand Down Expand Up @@ -165,6 +167,119 @@ def check_doi_publications(nwbfile: NWBFile) -> Optional[Iterable[InspectorMessa
return None


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=NWBFile)
def check_publication_list_format(nwbfile: NWBFile) -> Optional[Iterable[InspectorMessage]]:
"""
Check if related_publications entries contain comma-separated values that should be separate list entries.

Best Practice: :ref:`best_practice_doi_publications`
"""
if not nwbfile.related_publications:
return None
for publication in nwbfile.related_publications:
publication = publication.decode() if isinstance(publication, bytes) else publication
# Check for comma-separated DOIs or URLs within a single entry
# Look for patterns like "doi:xxx,doi:yyy" or "https://doi.org/xxx,https://doi.org/yyy"
if "," in publication:
# Check if the comma appears to separate multiple DOIs/URLs
parts = [p.strip() for p in publication.split(",")]
doi_indicators = ["doi:", "doi.org/", "dx.doi.org/"]
doi_like_parts = [part for part in parts if any(indicator in part.lower() for indicator in doi_indicators)]
if len(doi_like_parts) > 1:
yield InspectorMessage(
message=(
f"Metadata /general/related_publications contains a comma-separated list '{publication}'. "
"Each publication should be a separate entry in the list, not combined in a single string."
)
)

return None


def _convert_doi_to_url(doi_string: str) -> Optional[str]:
"""
Convert a DOI string to a resolvable URL.

Handles formats like:
- "doi:10.1234/abc" -> "https://doi.org/10.1234/abc"
- "https://doi.org/10.1234/abc" -> "https://doi.org/10.1234/abc"
- "http://dx.doi.org/10.1234/abc" -> "http://dx.doi.org/10.1234/abc"

Returns None if the string is not a recognizable DOI format.
"""
doi_string = doi_string.strip()

if doi_string.startswith("https://doi.org/") or doi_string.startswith("http://dx.doi.org/"):
return doi_string
elif doi_string.startswith("doi:"):
# Extract the DOI identifier after "doi:"
doi_id = doi_string[4:].strip()
return f"https://doi.org/{doi_id}"

return None


def _check_url_resolves(url: str, timeout: int = 10) -> tuple[bool, Optional[str]]:
"""
Check if a URL resolves by making a HEAD request.

Returns a tuple of (success, error_message).
"""
try:
request = Request(url, method="HEAD")
request.add_header("User-Agent", "NWBInspector/1.0")
with urlopen(request, timeout=timeout) as response:
# 2xx and 3xx status codes are considered successful
if response.status < 400:
return True, None
return False, f"HTTP {response.status}"
except HTTPError as e:
return False, f"HTTP {e.code}"
except URLError as e:
return False, str(e.reason)
except TimeoutError:
return False, "Request timed out"
except Exception as e:
return False, str(e)


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=NWBFile)
def check_publication_doi_resolves(nwbfile: NWBFile) -> Optional[Iterable[InspectorMessage]]:
"""
Check if DOI URLs in related_publications actually resolve.

This check makes network requests to verify that DOI URLs are valid and accessible.

Best Practice: :ref:`best_practice_doi_publications`
"""
if not nwbfile.related_publications:
return None

valid_starts = ["doi:", "http://dx.doi.org/", "https://doi.org/"]

for publication in nwbfile.related_publications:
publication = publication.decode() if isinstance(publication, bytes) else publication

# Only check entries that look like DOIs
if not any(publication.startswith(valid_start) for valid_start in valid_starts):
continue

url = _convert_doi_to_url(publication)
if url is None:
continue

resolves, error = _check_url_resolves(url)
if not resolves:
yield InspectorMessage(
message=(
f"Metadata /general/related_publications DOI '{publication}' does not resolve. "
f"Error: {error}. Please verify the DOI is correct."
)
)

return None


@register_check(importance=Importance.CRITICAL, neurodata_type=Subject)
def check_subject_age(subject: Subject) -> Optional[InspectorMessage]:
"""Check if the Subject age is in ISO 8601 or our extension of it for ranges."""
Expand Down
Loading
Loading